From e86a3b68f7413f0c4c5ec005e90df66a347c86f8 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Fri, 25 Oct 2024 17:10:14 +0200 Subject: [PATCH] file_cache.py: allow read transaction on uninitialized cache (#47212) This allows the following ```python cache.init_entry("my/cache") with cache.read_transaction("my/cache") as f: data = f.read() if f is not None else None ``` mirroring `write_transaction`, which returns a tuple `(old, new)` where `old` is `None` if the cache file did not exist yet. The alternative that requires less defensive programming on the call site would be to create the "old" file upon first read, but I did not want to think about how to safely atomically create the file, and it's not unthinkable that an empty file is an invalid format (for instance the call site may expect a json file, which requires at least {} bytes). --- lib/spack/spack/test/util/file_cache.py | 10 +-- lib/spack/spack/util/file_cache.py | 88 +++++++++++++++---------- 2 files changed, 58 insertions(+), 40 deletions(-) diff --git a/lib/spack/spack/test/util/file_cache.py b/lib/spack/spack/test/util/file_cache.py index cd9ce1c5b8defb..4af8a1ed0391d2 100644 --- a/lib/spack/spack/test/util/file_cache.py +++ b/lib/spack/spack/test/util/file_cache.py @@ -31,6 +31,11 @@ def test_write_and_read_cache_file(file_cache): assert text == "foobar\n" +def test_read_before_init(file_cache): + with file_cache.read_transaction("test.yaml") as stream: + assert stream is None + + @pytest.mark.not_on_windows("Locks not supported on Windows") def test_failed_write_and_read_cache_file(file_cache): """Test failing to write then attempting to read a cached file.""" @@ -46,11 +51,6 @@ def test_failed_write_and_read_cache_file(file_cache): # File does not exist assert not file_cache.init_entry("test.yaml") - # Attempting to read will cause a FileNotFoundError - with pytest.raises(FileNotFoundError, match=r"test\.yaml"): - with file_cache.read_transaction("test.yaml"): - pass - def test_write_and_remove_cache_file(file_cache): """Test two write transactions on a cached file. Then try to remove an diff --git a/lib/spack/spack/util/file_cache.py b/lib/spack/spack/util/file_cache.py index bc75704d757870..e72d431f978657 100644 --- a/lib/spack/spack/util/file_cache.py +++ b/lib/spack/spack/util/file_cache.py @@ -7,6 +7,7 @@ import math import os import shutil +from typing import IO, Optional, Tuple from llnl.util.filesystem import mkdirp, rename @@ -14,6 +15,51 @@ from spack.util.lock import Lock, ReadTransaction, WriteTransaction +def _maybe_open(path: str) -> Optional[IO[str]]: + try: + return open(path, "r") + except OSError as e: + if e.errno != errno.ENOENT: + raise + return None + + +class ReadContextManager: + def __init__(self, path: str) -> None: + self.path = path + + def __enter__(self) -> Optional[IO[str]]: + """Return a file object for the cache if it exists.""" + self.cache_file = _maybe_open(self.path) + return self.cache_file + + def __exit__(self, type, value, traceback): + if self.cache_file: + self.cache_file.close() + + +class WriteContextManager: + def __init__(self, path: str) -> None: + self.path = path + self.tmp_path = f"{self.path}.tmp" + + def __enter__(self) -> Tuple[Optional[IO[str]], IO[str]]: + """Return (old_file, new_file) file objects, where old_file is optional.""" + self.old_file = _maybe_open(self.path) + self.new_file = open(self.tmp_path, "w") + return self.old_file, self.new_file + + def __exit__(self, type, value, traceback): + if self.old_file: + self.old_file.close() + self.new_file.close() + + if value: + os.remove(self.tmp_path) + else: + rename(self.tmp_path, self.path) + + class FileCache: """This class manages cached data in the filesystem. @@ -107,7 +153,8 @@ def read_transaction(self, key): cache_file.read() """ - return ReadTransaction(self._get_lock(key), acquire=lambda: open(self.cache_path(key))) + path = self.cache_path(key) + return ReadTransaction(self._get_lock(key), acquire=lambda: ReadContextManager(path)) def write_transaction(self, key): """Get a write transaction on a file cache item. @@ -117,40 +164,11 @@ def write_transaction(self, key): moves the file into place on top of the old file atomically. """ - filename = self.cache_path(key) - if os.path.exists(filename) and not os.access(filename, os.W_OK): - raise CacheError( - "Insufficient permissions to write to file cache at {0}".format(filename) - ) - - # TODO: this nested context manager adds a lot of complexity and - # TODO: is pretty hard to reason about in llnl.util.lock. At some - # TODO: point we should just replace it with functions and simplify - # TODO: the locking code. - class WriteContextManager: - def __enter__(cm): - cm.orig_filename = self.cache_path(key) - cm.orig_file = None - if os.path.exists(cm.orig_filename): - cm.orig_file = open(cm.orig_filename, "r") - - cm.tmp_filename = self.cache_path(key) + ".tmp" - cm.tmp_file = open(cm.tmp_filename, "w") - - return cm.orig_file, cm.tmp_file - - def __exit__(cm, type, value, traceback): - if cm.orig_file: - cm.orig_file.close() - cm.tmp_file.close() - - if value: - os.remove(cm.tmp_filename) - - else: - rename(cm.tmp_filename, cm.orig_filename) - - return WriteTransaction(self._get_lock(key), acquire=WriteContextManager) + path = self.cache_path(key) + if os.path.exists(path) and not os.access(path, os.W_OK): + raise CacheError(f"Insufficient permissions to write to file cache at {path}") + + return WriteTransaction(self._get_lock(key), acquire=lambda: WriteContextManager(path)) def mtime(self, key) -> float: """Return modification time of cache file, or -inf if it does not exist.