Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix get_avail_memory for cgroups v1/v2 (AB#24354) #13

Merged
merged 3 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ jobs:
run: rpmlint Lmod-config.spec

- name: Run luacheck
uses: lunarmodules/luacheck@v0
uses: lunarmodules/luacheck@v1
with:
args: -g .
4 changes: 3 additions & 1 deletion Lmod-config.spec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Summary: Sitepackage and other config files for Lmod
Name: Lmod-config
Version: 1.9
Version: 1.10
Release: 1
License: GPL
Group: Applications/System
Expand Down Expand Up @@ -40,6 +40,8 @@ exit 0
%{_libexecdir}/lmod/run_lmod_cache.py

%changelog
* Fri Jan 17 2025 Alex Domingo <[email protected]>
- Fix get_avail_memory for cgroups v1/v2
* Mon Jan 06 2025 Cintia Willemyns <[email protected]>
- Hide modules older than module_age 6, instead of 5
* Wed Sep 25 2024 Samuel Moors <[email protected]>
Expand Down
33 changes: 24 additions & 9 deletions SitePackage.lua
Original file line number Diff line number Diff line change
Expand Up @@ -257,13 +257,29 @@ end
local function get_avail_memory()
-- If a limit is set, return the maximum allowed memory, else nil

-- look for the memory cgroup (if any)
local cgroup = nil
-- look for the memory cgroup (if any):
-- for cgroupv2: find the hierarchegy and the memory controller
-- for cgroupv1: look for the memory controller
local lines = {}
for line in io.lines("/proc/self/cgroup") do
cgroup = line:match("^[0-9]+:memory:(.*)$")
if cgroup then
break
table.insert(lines, line)
end

local cgroup = nil
local memory_filepath
-- if it's one line: cgroupv2
if #lines == 1 then
cgroup = lines[1]:match("^[0-9]+::(.*)$")
memory_filepath = "/sys/fs/cgroup/_CGROUP_SET_/memory.max"

else
for _, line in ipairs(lines) do
cgroup = line:match("^[0-9]+:memory:(.*)$")
if cgroup then
break
end
end
memory_filepath = "/sys/fs/cgroup/memory/_CGROUP_SET_/memory.memsw.limit_in_bytes"
end

if not cgroup then
Expand All @@ -273,13 +289,12 @@ local function get_avail_memory()
-- Slurm tasks are only limited by the job step that launched it
cgroup = cgroup:gsub("/task_[%d]+$", "")

-- read the current maximum allowed memory usage (memory + swap)
local memory_file = io.open("/sys/fs/cgroup/memory/" .. cgroup .. "/memory.memsw.limit_in_bytes")

-- read the current maximum allowed memory usage (memory)
memory_filepath = memory_filepath:gsub("_CGROUP_SET_", cgroup)
local memory_file = io.open(memory_filepath)
if not memory_file then
return nil
end

local memory_value = tonumber(memory_file:read())
memory_file:close()

Expand Down
Loading