Skip to content

Commit

Permalink
Merge pull request #91 from tud-zih-energy/marenz.fix-cpuset-issue
Browse files Browse the repository at this point in the history
Fix thread affinity/detected number of threads with some disabled in a cgroup.
  • Loading branch information
marenz2569 authored Dec 5, 2024
2 parents aa10980 + a839c00 commit 3791fb7
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 41 deletions.
51 changes: 31 additions & 20 deletions include/firestarter/Environment/CPUTopology.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@ extern "C" {

namespace firestarter::environment {

/// This struct describes properties of the threads which are used in the Environment class to assign a specific number
/// of threads and/or use it for cpu binding.
struct HardwareThreadsInfo {
HardwareThreadsInfo() = default;

/// The number of hardware threads on this system.
unsigned MaxNumThreads = 0;
/// The highest physical index on a hardware thread in the system.
unsigned MaxPhysicalIndex = 0;
};

/// This class models the properties of a processor.
class CPUTopology {
public:
Expand All @@ -42,35 +53,18 @@ class CPUTopology {

friend auto operator<<(std::ostream& Stream, CPUTopology const& CpuTopologyRef) -> std::ostream&;

/// The total number of hardware threads.
[[nodiscard]] auto numThreads() const -> unsigned { return NumThreadsPerCore * NumCoresTotal; }
/// The maximum os_index of all PUs plus 1 if we cannot determine the number of cpu kinds. Otherwise the maximum
/// number of PUs.
[[nodiscard]] auto maxNumThreads() const -> unsigned;
/// Assuming we have a consistent number of threads per core. The number of thread per core.
[[nodiscard]] auto numThreadsPerCore() const -> unsigned { return NumThreadsPerCore; }
/// The total number of cores.
[[nodiscard]] auto numCoresTotal() const -> unsigned { return NumCoresTotal; }
/// The total number of packages.
[[nodiscard]] auto numPackages() const -> unsigned { return NumPackages; }
/// The CPU architecture e.g., x86_64
[[nodiscard]] auto architecture() const -> std::string const& { return Architecture; }
/// The CPU vendor i.e., Intel or AMD.
[[nodiscard]] virtual auto vendor() const -> std::string const& { return Vendor; }
/// The processor name, this includes the vendor specific name
[[nodiscard]] virtual auto processorName() const -> std::string const& { return ProcessorName; }
/// The model of the processor. With X86 this is the the string of Family, Model and Stepping.
[[nodiscard]] virtual auto model() const -> std::string const& = 0;

/// Get the properties about the hardware threads.
[[nodiscard]] auto hardwareThreadsInfo() const -> HardwareThreadsInfo;

/// Getter for the L1i-cache size in bytes
[[nodiscard]] auto instructionCacheSize() const -> const auto& { return InstructionCacheSize; }

/// Getter for the clockrate in Hz
[[nodiscard]] virtual auto clockrate() const -> uint64_t { return Clockrate; }

/// Getter for the list of CPU features
[[nodiscard]] virtual auto features() const -> std::list<std::string> const& = 0;

/// Get the current hardware timestamp
[[nodiscard]] virtual auto timestamp() const -> uint64_t = 0;

Expand All @@ -85,6 +79,23 @@ class CPUTopology {
[[nodiscard]] auto getPkgIdFromPU(unsigned Pu) const -> std::optional<unsigned>;

protected:
/// The total number of hardware threads.
[[nodiscard]] auto numThreads() const -> unsigned { return NumThreadsPerCore * NumCoresTotal; }
/// The total number of cores.
[[nodiscard]] auto numCoresTotal() const -> unsigned { return NumCoresTotal; }
/// The total number of packages.
[[nodiscard]] auto numPackages() const -> unsigned { return NumPackages; }
/// The CPU architecture e.g., x86_64
[[nodiscard]] auto architecture() const -> std::string const& { return Architecture; }
/// The CPU vendor i.e., Intel or AMD.
[[nodiscard]] virtual auto vendor() const -> std::string const& { return Vendor; }
/// The processor name, this includes the vendor specific name
[[nodiscard]] virtual auto processorName() const -> std::string const& { return ProcessorName; }
/// The model of the processor. With X86 this is the the string of Family, Model and Stepping.
[[nodiscard]] virtual auto model() const -> std::string const& = 0;
/// Getter for the list of CPU features
[[nodiscard]] virtual auto features() const -> std::list<std::string> const& = 0;

/// Read the scaling_govenor file of cpu0 on linux and return the contents as a string.
[[nodiscard]] static auto scalingGovernor() -> std::string;

Expand Down
44 changes: 30 additions & 14 deletions src/firestarter/Environment/CPUTopology.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,45 +365,61 @@ auto CPUTopology::getPkgIdFromPU(unsigned Pu) const -> std::optional<unsigned> {
return {};
}

auto CPUTopology::maxNumThreads() const -> unsigned {
unsigned Max = 0;
auto CPUTopology::hardwareThreadsInfo() const -> HardwareThreadsInfo {
HardwareThreadsInfo Infos;

// There might be more then one kind of cores
// Get the number of different kinds of CPUs
const auto NrCpukinds = hwloc_cpukinds_get_nr(Topology, 0);

// fallback in case this did not work ... can happen on some platforms
// already printed a warning earlier
if (NrCpukinds < 1) {
if (NrCpukinds < 0) {
log::fatal() << "flags to hwloc_cpukinds_get_nr is invalid. This is not expected.";
}

// No information about the cpukinds found. Go through all PUs and save the biggest os index.
if (NrCpukinds == 0) {
auto Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_PU);
unsigned Max = 0;
Infos.MaxNumThreads = Width;

for (int I = 0; I < Width; I++) {
auto* Obj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_PU, I);
Max = (std::max)(Max, Obj->os_index);
Infos.MaxPhysicalIndex = (std::max)(Infos.MaxPhysicalIndex, Obj->os_index);
}

return Max + 1;
return Infos;
}

// Allocate bitmap to get CPUs later
hwloc_bitmap_t Bitmap = hwloc_bitmap_alloc();
if (Bitmap == nullptr) {
log::error() << "Could not allocate memory for CPU bitmap";
return 1;
// Error should abort, otherwise return zero.
log::fatal() << "Could not allocate memory for CPU bitmap";
return Infos;
}

// Find CPUs per kind
// Go through all cpukinds and save the biggest os index.
for (int KindIndex = 0; KindIndex < NrCpukinds; KindIndex++) {
const auto Result = hwloc_cpukinds_get_info(Topology, KindIndex, Bitmap, nullptr, nullptr, nullptr, 0);
if (Result) {
log::warn() << "Could not get information for CPU kind " << KindIndex;
}
Max += hwloc_bitmap_weight(Bitmap);

auto Weight = hwloc_bitmap_weight(Bitmap);
if (Weight < 0) {
log::fatal() << "bitmap is full or bitmap is not infinitely set";
}

auto MaxIndex = hwloc_bitmap_last(Bitmap);
if (MaxIndex < 0) {
log::fatal() << "bitmap is full or bitmap is not infinitely set";
}

Infos.MaxNumThreads += Weight;
Infos.MaxPhysicalIndex = (std::max)(Infos.MaxPhysicalIndex, static_cast<unsigned>(MaxIndex));
}

hwloc_bitmap_free(Bitmap);

return Max;
return Infos;
}

}; // namespace firestarter::environment
14 changes: 7 additions & 7 deletions src/firestarter/Environment/Environment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void Environment::addCpuSet(unsigned Cpu, cpu_set_t& Mask) const {
if (cpuAllowed(Cpu)) {
CPU_SET(Cpu, &Mask);
} else {
if (Cpu >= topology().numThreads()) {
if (Cpu > topology().hardwareThreadsInfo().MaxPhysicalIndex) {
throw std::invalid_argument("The given bind argument (-b/--bind) includes CPU " + std::to_string(Cpu) +
" that is not available on this system.");
}
Expand All @@ -73,7 +73,7 @@ void Environment::addCpuSet(unsigned Cpu, cpu_set_t& Mask) const {
#endif

void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind) {
if (RequestedNumThreads > 0 && RequestedNumThreads > topology().numThreads()) {
if (RequestedNumThreads > 0 && RequestedNumThreads > topology().hardwareThreadsInfo().MaxNumThreads) {
log::warn() << "Not enough CPUs for requested number of threads";
}

Expand All @@ -87,7 +87,7 @@ void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s

// use all CPUs if not defined otherwise
if (RequestedNumThreads == 0) {
for (unsigned I = 0; I < topology().maxNumThreads(); I++) {
for (unsigned I = 0; I <= topology().hardwareThreadsInfo().MaxPhysicalIndex; I++) {
if (cpuAllowed(I)) {
CPU_SET(I, &Cpuset);
RequestedNumThreads++;
Expand All @@ -96,7 +96,7 @@ void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
} else {
// if -n / --threads is set
unsigned CpuCount = 0;
for (unsigned I = 0; I < topology().maxNumThreads(); I++) {
for (unsigned I = 0; I <= topology().hardwareThreadsInfo().MaxPhysicalIndex; I++) {
// skip if cpu is not available
if (!cpuAllowed(I)) {
continue;
Expand Down Expand Up @@ -165,7 +165,7 @@ void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
}

// Save the ids of the threads.
for (unsigned I = 0; I < topology().maxNumThreads(); I++) {
for (unsigned I = 0; I <= topology().hardwareThreadsInfo().MaxPhysicalIndex; I++) {
if (CPU_ISSET(I, &Cpuset)) {
this->CpuBind.push_back(I);
}
Expand All @@ -174,12 +174,12 @@ void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
(void)CpuBind;

if (RequestedNumThreads == 0) {
RequestedNumThreads = topology().maxNumThreads();
RequestedNumThreads = topology().hardwareThreadsInfo().MaxNumThreads;
}
#endif

// Limit the number of thread to the maximum on the CPU.
this->RequestedNumThreads = (std::min)(RequestedNumThreads, topology().maxNumThreads());
this->RequestedNumThreads = (std::min)(RequestedNumThreads, topology().hardwareThreadsInfo().MaxNumThreads);
}

void Environment::printThreadSummary() {
Expand Down

0 comments on commit 3791fb7

Please sign in to comment.