Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Give warning if runpath disk space is close to full on ert startup #9193

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/ert/config/model_config.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from __future__ import annotations

import contextlib
import logging
import os.path
import shutil
from datetime import datetime
from typing import no_type_check

from pydantic import field_validator
from pydantic.dataclasses import dataclass

from ert.shared.status.utils import byte_with_unit, get_mount_directory

from .parsing import (
ConfigDict,
ConfigKeys,
Expand Down Expand Up @@ -43,6 +47,11 @@ def str_to_datetime(date_str: str) -> datetime:
DEFAULT_JOBNAME_FORMAT = "<CONFIG_FILE>-<IENS>"
DEFAULT_ECLBASE_FORMAT = "ECLBASE<IENS>"

FULL_DISK_PERCENTAGE_THRESHOLD = 0.97
MINIMUM_BYTES_LEFT_ON_DISK_THRESHOLD = 200 * 1000**3 # 200 GB
# We give warning if free disk space is less than MINIMUM_BYTES_LEFT_ON_DISK_THRESHOLD
# and used space in percentage is greater than FULL_DISK_PERCENTAGE_THRESHOLD


@dataclass
class ModelConfig:
Expand Down Expand Up @@ -83,6 +92,19 @@ def validate_runpath(cls, runpath_format_string: str) -> str:
)
ConfigWarning.warn(msg)
logger.warning(msg)
with contextlib.suppress(Exception):
mount_dir = get_mount_directory(runpath_format_string)
total_space, used_space, free_space = shutil.disk_usage(mount_dir)
percentage_used = used_space / total_space
if (
percentage_used > FULL_DISK_PERCENTAGE_THRESHOLD
and free_space < MINIMUM_BYTES_LEFT_ON_DISK_THRESHOLD
):
msg = (
f"Low disk space: {byte_with_unit(free_space)} free on {mount_dir !s}."
" Consider freeing up some space to ensure successful simulation runs."
)
ConfigWarning.warn(msg)
return result

@field_validator("jobname_format_string", mode="before")
Expand Down
10 changes: 9 additions & 1 deletion src/ert/gui/simulation/run_dialog.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,12 @@
byte_with_unit,
file_has_content,
format_running_time,
get_mount_directory,
)

from ..find_ert_info import find_ert_info
from .queue_emitter import QueueEmitter
from .view import ProgressWidget, RealizationWidget, UpdateWidget
from .view import DiskSpaceWidget, ProgressWidget, RealizationWidget, UpdateWidget

_TOTAL_PROGRESS_TEMPLATE = "Total progress {total_progress}% — {iteration_label}"

Expand Down Expand Up @@ -195,6 +196,8 @@ def __init__(
self._ticker = QTimer(self)
self._ticker.timeout.connect(self._on_ticker)

self.run_path_mp = get_mount_directory(run_model.run_paths._runpath_format)

self._total_progress_label = QLabel(
_TOTAL_PROGRESS_TEMPLATE.format(
total_progress=0, iteration_label="Starting..."
Expand All @@ -220,6 +223,7 @@ def __init__(

self.running_time = QLabel("")
self.memory_usage = QLabel("")
self.disk_space = DiskSpaceWidget()

self.kill_button = QPushButton("Terminate experiment")
self.restart_button = QPushButton("Rerun failed")
Expand All @@ -245,6 +249,8 @@ def __init__(
button_layout.addStretch()
button_layout.addWidget(self.memory_usage)
button_layout.addStretch()
button_layout.addWidget(self.disk_space)
button_layout.addStretch()
button_layout.addWidget(self.copy_debug_info_button)
button_layout.addWidget(self.kill_button)
button_layout.addWidget(self.restart_button)
Expand Down Expand Up @@ -425,6 +431,8 @@ def _on_ticker(self) -> None:

maximum_memory_usage = self._snapshot_model.root.max_memory_usage

self.disk_space.update_status(self.run_path_mp)

if maximum_memory_usage:
self.memory_usage.setText(
f"Maximal realization memory usage: {byte_with_unit(maximum_memory_usage)}"
Expand Down
3 changes: 2 additions & 1 deletion src/ert/gui/simulation/view/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .disk_space_widget import DiskSpaceWidget
from .progress_widget import ProgressWidget
from .realization import RealizationWidget
from .update import UpdateWidget

__all__ = ["ProgressWidget", "RealizationWidget", "UpdateWidget"]
__all__ = ["DiskSpaceWidget", "ProgressWidget", "RealizationWidget", "UpdateWidget"]
65 changes: 65 additions & 0 deletions src/ert/gui/simulation/view/disk_space_widget.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from pathlib import Path

from qtpy.QtCore import Qt
from qtpy.QtWidgets import QHBoxLayout, QLabel, QProgressBar, QWidget

from ert.shared.status.utils import disk_space_status


class DiskSpaceWidget(QWidget):
def __init__(self, parent: QWidget | None = None) -> None:
super().__init__(parent)

layout = QHBoxLayout(self)
layout.setContentsMargins(0, 0, 0, 0)
layout.setSpacing(10)

# Text label
self.usage_label = QLabel(self)
self.space_left_label = QLabel(self)

# Progress bar
self.progress_bar = QProgressBar(self)
self.progress_bar.setRange(0, 100)
self.progress_bar.setTextVisible(True)
self.progress_bar.setFixedWidth(100)
self.progress_bar.setAlignment(Qt.AlignCenter) # type: ignore

layout.addWidget(self.usage_label)
layout.addWidget(self.progress_bar)
layout.addWidget(self.space_left_label)

def update_status(self, mount_dir: Path) -> None:
"""Update both the label and progress bar with current disk usage"""
disk_info = disk_space_status(mount_dir)
if disk_info is not None:
usage = int(disk_info[0])
self.usage_label.setText("Disk space runpath:")
self.progress_bar.setValue(usage)
self.progress_bar.setFormat(f"{disk_info[0]:.1f}%")

# Set color based on usage threshold
if usage >= 90:
color = "#e74c3c" # Red for critical usage
elif usage >= 70:
color = "#f1c40f" # Yellow for warning
else:
color = "#2ecc71" # Green for normal usage

self.progress_bar.setStyleSheet(f"""
QProgressBar {{
border: 1px solid #ccc;
border-radius: 2px;
text-align: center;
}}

QProgressBar::chunk {{
background-color: {color};
}}
""")

self.space_left_label.setText(f"{disk_info[1]} free")

self.setVisible(True)
else:
self.setVisible(False)
20 changes: 20 additions & 0 deletions src/ert/shared/status/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import contextlib
import math
import os
import resource
import shutil
import sys
from pathlib import Path


def byte_with_unit(byte_count: float) -> str:
Expand Down Expand Up @@ -82,3 +85,20 @@ def get_ert_memory_usage() -> int:
rss_scale = 1000

return usage.ru_maxrss // rss_scale


def disk_space_status(mount_dir: Path) -> tuple[float, str] | None:
with contextlib.suppress(Exception):
disk_info = shutil.disk_usage(mount_dir)
percentage_used = (disk_info.used / disk_info.total) * 100
return percentage_used, byte_with_unit(disk_info.free)
return None


def get_mount_directory(runpath: str) -> Path:
path = Path(runpath).absolute()

while not path.is_mount():
path = path.parent

return path
56 changes: 55 additions & 1 deletion tests/ert/unit_tests/config/test_model_config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from pathlib import Path
from unittest.mock import patch

import pytest

from ert.config import ModelConfig
from ert.config.parsing import ConfigKeys, ConfigValidationError
from ert.config.parsing import ConfigKeys, ConfigValidationError, ConfigWarning


def test_default_model_config_run_path(tmpdir):
Expand Down Expand Up @@ -61,3 +64,54 @@ def test_that_invalid_time_map_file_raises_config_validation_error(tmpdir):

with pytest.raises(ConfigValidationError, match="Could not read timemap file"):
_ = ModelConfig.from_dict({ConfigKeys.TIME_MAP: "time_map.txt"})


@pytest.mark.parametrize(
"total_space, used_space, to_warn, expected_warning",
[
pytest.param(
10 * 1000**4, # 10 TB
9.75 * 1000**4, # 9.75 TB
False,
None,
id="Low disk space percentage on large disk",
),
pytest.param(
100 * 1000**3, # 100 GB
99 * 1000**3, # 99 GB
True,
"Low disk space: 1.00 GB free on",
id="Low disk space small disk",
),
pytest.param(
10 * 1000**5, # 10 PB
9.99994 * 1000**5, # 9.99994 PB
True,
"Low disk space: 60.00 GB free on",
id="Low disk space small disk",
),
pytest.param(
100 * 1000**3, # 100 GB
75 * 1000**3, # 75 GB
False,
None,
id="Sufficient disk space",
),
],
)
def test_warning_when_full_disk(
tmp_path, recwarn, total_space, used_space, to_warn, expected_warning
):
Path(tmp_path / "simulations").mkdir()
runpath = f"{tmp_path !s}/simulations/realization-%d/iter-%d"
with patch(
"ert.config.model_config.shutil.disk_usage",
return_value=(total_space, used_space, total_space - used_space),
):
if to_warn:
with pytest.warns(ConfigWarning, match=expected_warning):
_ = ModelConfig(num_realizations=1, runpath_format_string=runpath)
else:
_ = ModelConfig(num_realizations=1, runpath_format_string=runpath)
for w in recwarn:
assert not issubclass(w.category, ConfigWarning)
3 changes: 3 additions & 0 deletions tests/ert/unit_tests/gui/simulation/test_run_dialog.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def run_model():
run_model.format_error.return_value = ""
run_model.get_runtime.return_value = 1
run_model.support_restart = True
run_paths_mock = MagicMock()
run_paths_mock._runpath_format = "/"
run_model.run_paths = run_paths_mock
return run_model


Expand Down
Loading