From 34ec7e98d460e9360b1b8a34a05ee325d59f647d Mon Sep 17 00:00:00 2001 From: Derek Weitzel Date: Thu, 4 Oct 2018 15:22:34 -0500 Subject: [PATCH 1/8] Cleanup of singularity images --- cleanup.py | 83 ++++++++++++++++++++++++++++++++++++++++++ cvmfs-singularity-sync | 2 + requirements.txt | 2 + 3 files changed, 87 insertions(+) create mode 100644 cleanup.py diff --git a/cleanup.py b/cleanup.py new file mode 100644 index 0000000..6fb75cb --- /dev/null +++ b/cleanup.py @@ -0,0 +1,83 @@ +""" +Cleanup for Singularity container + +Scan the images in the singularity CVMFS. If an image directory has not been "linked" to for 2 days, +remove the image directory. + +Maintains state in a file in the root singularity directory named .missing_links.json + +""" +import glob +import os +import json +import datetime +import dateutil.parser +import shutil + +json_location = "/cvmfs/singularity.opensciencegrid.org/.missing_links.json" +#json_location = "missing_links.json" + +# JSON structure: +# { +# "missing_links": { +# "/cvmfs/singularity.opensciencegrid.org/.images/7d/ba009871baa50e01d655a80f79728800401bbd0f5e7e18b5055839e713c09f": "" +# ... +# } +# } + +def cleanup(delay=2): + + # Read in the old json, if it exists + json_missing_links = {} + if os.path.exists(json_location): + with open(json_location) as json_file: + json_missing_links = json.loads(json_file.read())['missing_links'] + + # Get all the images in the repo + + # Walk the directory /cvmfs/singularity.opensciencegrid.org/.images/* + image_dirs = glob.glob("/cvmfs/singularity.opensciencegrid.org/.images/*/*") + + # Walk the named image dirs + named_image_dir = glob.glob("/cvmfs/singularity.opensciencegrid.org/*/*") + + # For named image dir, look at the what the symlink points at + for named_image in named_image_dir: + link_target = os.readlink(named_image) + # Multiple images can point to the same image_dir + if link_target not in image_dirs: + print "%s not in list of image directories from %s" % (link_target, named_image) + else: + image_dirs.remove(link_target) + + # Now, for each image, see if it's in the json + for image_dir in image_dirs: + if image_dir in json_missing_links: + image_dirs.remove(image_dir) + else: + # Add it to the json + print "Newly found missing link: %s" % (image_dir) + json_missing_links[image_dir] = str(datetime.datetime.now()) + + # Loop through the json missing links, removing directories if over the `delay` days + for image_dir, last_linked in json_missing_links.items(): + date_last_linked = dateutil.parser.parse(last_linked) + if date_last_linked < (datetime.datetime.now() - datetime.timedelta(days=delay)): + # Remove the directory + print "Removing missing link: %s" % image_dir + shutil.rmtree(image_dir) + del json_missing_links[image_dir] + + # Write out the end json + with open(json_location, 'w') as json_file: + json_file.write(json.dumps({"missing_links": json_missing_links}, default=str)) + + + +def main(): + + cleanup() + + +if __name__ == "__main__": + main() diff --git a/cvmfs-singularity-sync b/cvmfs-singularity-sync index 6794b30..64af204 100755 --- a/cvmfs-singularity-sync +++ b/cvmfs-singularity-sync @@ -43,6 +43,7 @@ import urllib2 import hashlib import tempfile import tarfile +import cleanup def main(): parser = argparse.ArgumentParser(description="Bootstrap Docker images for Singularity containers deployed to CVMFS") @@ -129,6 +130,7 @@ def main(): if retval: final_retval = retval print "All requested images have been attempted; final return code: %d" % final_retval + cleanup.cleanup() return final_retval diff --git a/requirements.txt b/requirements.txt index 805d81d..c44cd30 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ docker==2.0.0 furl requests +python-dateutil + From 4c8205a123287e6c429ac587f000364c2072b545 Mon Sep 17 00:00:00 2001 From: Derek Weitzel Date: Thu, 4 Oct 2018 15:24:37 -0500 Subject: [PATCH 2/8] Removing whitespace --- cleanup.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cleanup.py b/cleanup.py index 6fb75cb..626d608 100644 --- a/cleanup.py +++ b/cleanup.py @@ -26,21 +26,21 @@ # } def cleanup(delay=2): - + # Read in the old json, if it exists json_missing_links = {} if os.path.exists(json_location): with open(json_location) as json_file: json_missing_links = json.loads(json_file.read())['missing_links'] - + # Get all the images in the repo - + # Walk the directory /cvmfs/singularity.opensciencegrid.org/.images/* image_dirs = glob.glob("/cvmfs/singularity.opensciencegrid.org/.images/*/*") - + # Walk the named image dirs named_image_dir = glob.glob("/cvmfs/singularity.opensciencegrid.org/*/*") - + # For named image dir, look at the what the symlink points at for named_image in named_image_dir: link_target = os.readlink(named_image) @@ -49,7 +49,7 @@ def cleanup(delay=2): print "%s not in list of image directories from %s" % (link_target, named_image) else: image_dirs.remove(link_target) - + # Now, for each image, see if it's in the json for image_dir in image_dirs: if image_dir in json_missing_links: @@ -58,7 +58,7 @@ def cleanup(delay=2): # Add it to the json print "Newly found missing link: %s" % (image_dir) json_missing_links[image_dir] = str(datetime.datetime.now()) - + # Loop through the json missing links, removing directories if over the `delay` days for image_dir, last_linked in json_missing_links.items(): date_last_linked = dateutil.parser.parse(last_linked) @@ -67,15 +67,15 @@ def cleanup(delay=2): print "Removing missing link: %s" % image_dir shutil.rmtree(image_dir) del json_missing_links[image_dir] - + # Write out the end json with open(json_location, 'w') as json_file: json_file.write(json.dumps({"missing_links": json_missing_links}, default=str)) - + def main(): - + cleanup() From dea36c3c901fb8f0402ae51d5ee92eec3de52e48 Mon Sep 17 00:00:00 2001 From: John Thiltges Date: Mon, 15 Oct 2018 12:54:22 -0500 Subject: [PATCH 3/8] Switch print() to python 3 style --- cleanup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cleanup.py b/cleanup.py index 626d608..45de952 100644 --- a/cleanup.py +++ b/cleanup.py @@ -46,7 +46,7 @@ def cleanup(delay=2): link_target = os.readlink(named_image) # Multiple images can point to the same image_dir if link_target not in image_dirs: - print "%s not in list of image directories from %s" % (link_target, named_image) + print("%s not in list of image directories from %s" % (link_target, named_image)) else: image_dirs.remove(link_target) @@ -56,7 +56,7 @@ def cleanup(delay=2): image_dirs.remove(image_dir) else: # Add it to the json - print "Newly found missing link: %s" % (image_dir) + print("Newly found missing link: %s" % (image_dir)) json_missing_links[image_dir] = str(datetime.datetime.now()) # Loop through the json missing links, removing directories if over the `delay` days @@ -64,7 +64,7 @@ def cleanup(delay=2): date_last_linked = dateutil.parser.parse(last_linked) if date_last_linked < (datetime.datetime.now() - datetime.timedelta(days=delay)): # Remove the directory - print "Removing missing link: %s" % image_dir + print("Removing missing link: %s" % image_dir) shutil.rmtree(image_dir) del json_missing_links[image_dir] From ca7c5c49fdcf14a8171ff2dc7415159f745a90ca Mon Sep 17 00:00:00 2001 From: John Thiltges Date: Mon, 15 Oct 2018 12:55:41 -0500 Subject: [PATCH 4/8] Add `--test` flag to image cleanup --- cleanup.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/cleanup.py b/cleanup.py index 45de952..aae1f62 100644 --- a/cleanup.py +++ b/cleanup.py @@ -13,6 +13,7 @@ import datetime import dateutil.parser import shutil +import argparse json_location = "/cvmfs/singularity.opensciencegrid.org/.missing_links.json" #json_location = "missing_links.json" @@ -25,8 +26,8 @@ # } # } -def cleanup(delay=2): - +def cleanup(delay=2, test=False): + '''Clean up unlinked singularity images''' # Read in the old json, if it exists json_missing_links = {} if os.path.exists(json_location): @@ -65,8 +66,9 @@ def cleanup(delay=2): if date_last_linked < (datetime.datetime.now() - datetime.timedelta(days=delay)): # Remove the directory print("Removing missing link: %s" % image_dir) - shutil.rmtree(image_dir) - del json_missing_links[image_dir] + if not test: + shutil.rmtree(image_dir) + del json_missing_links[image_dir] # Write out the end json with open(json_location, 'w') as json_file: @@ -75,9 +77,17 @@ def cleanup(delay=2): def main(): + '''Main function''' + args = parse_args() + cleanup(test=args.test) - cleanup() +def parse_args(): + '''Parse CLI options''' + parser = argparse.ArgumentParser() + parser.add_argument('--test', action='store_true', + help="Don't remove files, but go through the motions of removing them.") + return parser.parse_args() if __name__ == "__main__": main() From e03a12ac0b18434bc6082e1f5849c85ae38aa260 Mon Sep 17 00:00:00 2001 From: John Thiltges Date: Mon, 15 Oct 2018 12:57:44 -0500 Subject: [PATCH 5/8] Switch cleanup to using a const for SINGULARITY_BASE --- cleanup.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/cleanup.py b/cleanup.py index aae1f62..91da560 100644 --- a/cleanup.py +++ b/cleanup.py @@ -15,8 +15,10 @@ import shutil import argparse -json_location = "/cvmfs/singularity.opensciencegrid.org/.missing_links.json" -#json_location = "missing_links.json" +SINGULARITY_BASE = '/cvmfs/singularity.opensciencegrid.org' + +# /cvmfs/singularity.opensciencegrid.org/.missing_links.json +JSON_LOCATION = os.path.join(SINGULARITY_BASE, '.missing_links.json') # JSON structure: # { @@ -30,17 +32,20 @@ def cleanup(delay=2, test=False): '''Clean up unlinked singularity images''' # Read in the old json, if it exists json_missing_links = {} - if os.path.exists(json_location): - with open(json_location) as json_file: - json_missing_links = json.loads(json_file.read())['missing_links'] + try: + with open(JSON_LOCATION) as json_file: + json_missing_links = json.load(json_file)['missing_links'] + except (IOError, ValueError): + # File is missing, unreadable, or damaged + pass # Get all the images in the repo # Walk the directory /cvmfs/singularity.opensciencegrid.org/.images/* - image_dirs = glob.glob("/cvmfs/singularity.opensciencegrid.org/.images/*/*") + image_dirs = glob.glob(os.path.join(SINGULARITY_BASE, '.images/*/*')) # Walk the named image dirs - named_image_dir = glob.glob("/cvmfs/singularity.opensciencegrid.org/*/*") + named_image_dir = glob.glob(os.path.join(SINGULARITY_BASE, '*/*')) # For named image dir, look at the what the symlink points at for named_image in named_image_dir: @@ -71,10 +76,8 @@ def cleanup(delay=2, test=False): del json_missing_links[image_dir] # Write out the end json - with open(json_location, 'w') as json_file: - json_file.write(json.dumps({"missing_links": json_missing_links}, default=str)) - - + with open(JSON_LOCATION, 'w') as json_file: + json.dump({"missing_links": json_missing_links}, json_file) def main(): '''Main function''' From 3e70d673d74b643432fb945cacf5e0c786cba690 Mon Sep 17 00:00:00 2001 From: John Thiltges Date: Mon, 15 Oct 2018 12:59:27 -0500 Subject: [PATCH 6/8] Switch cleanup script to use datetime (default module) --- cleanup.py | 11 ++++++----- requirements.txt | 2 -- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cleanup.py b/cleanup.py index 91da560..5035139 100644 --- a/cleanup.py +++ b/cleanup.py @@ -10,10 +10,10 @@ import glob import os import json -import datetime -import dateutil.parser import shutil import argparse +import time +from datetime import datetime, timedelta SINGULARITY_BASE = '/cvmfs/singularity.opensciencegrid.org' @@ -63,12 +63,13 @@ def cleanup(delay=2, test=False): else: # Add it to the json print("Newly found missing link: %s" % (image_dir)) - json_missing_links[image_dir] = str(datetime.datetime.now()) + json_missing_links[image_dir] = int(time.time()) # Loop through the json missing links, removing directories if over the `delay` days + expiry = datetime.now() - timedelta(days=delay) for image_dir, last_linked in json_missing_links.items(): - date_last_linked = dateutil.parser.parse(last_linked) - if date_last_linked < (datetime.datetime.now() - datetime.timedelta(days=delay)): + date_last_linked = datetime.fromtimestamp(last_linked) + if date_last_linked < expiry: # Remove the directory print("Removing missing link: %s" % image_dir) if not test: diff --git a/requirements.txt b/requirements.txt index c44cd30..805d81d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,3 @@ docker==2.0.0 furl requests -python-dateutil - From 3de7b24647c383543291b55d2a1553b691af13b7 Mon Sep 17 00:00:00 2001 From: John Thiltges Date: Mon, 15 Oct 2018 13:01:16 -0500 Subject: [PATCH 7/8] Handle case where image is unlinked and restored and add safety checking before removing a directory --- cleanup.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cleanup.py b/cleanup.py index 5035139..d7c50d4 100644 --- a/cleanup.py +++ b/cleanup.py @@ -50,17 +50,14 @@ def cleanup(delay=2, test=False): # For named image dir, look at the what the symlink points at for named_image in named_image_dir: link_target = os.readlink(named_image) - # Multiple images can point to the same image_dir - if link_target not in image_dirs: - print("%s not in list of image directories from %s" % (link_target, named_image)) - else: + while link_target in image_dirs: image_dirs.remove(link_target) + # Remove linked image from json (in case link is restored) + json_missing_links.pop(link_target, None) # Now, for each image, see if it's in the json for image_dir in image_dirs: - if image_dir in json_missing_links: - image_dirs.remove(image_dir) - else: + if image_dir not in json_missing_links: # Add it to the json print("Newly found missing link: %s" % (image_dir)) json_missing_links[image_dir] = int(time.time()) @@ -70,6 +67,9 @@ def cleanup(delay=2, test=False): for image_dir, last_linked in json_missing_links.items(): date_last_linked = datetime.fromtimestamp(last_linked) if date_last_linked < expiry: + # Confirm that we're inside the managed directory + if not image_dir.startswith(SINGULARITY_BASE): + continue # Remove the directory print("Removing missing link: %s" % image_dir) if not test: From a2721ec9510f43565543e666cc8472705c93a184 Mon Sep 17 00:00:00 2001 From: John Thiltges Date: Mon, 15 Oct 2018 13:09:31 -0500 Subject: [PATCH 8/8] Add shebang --- cleanup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cleanup.py b/cleanup.py index d7c50d4..9a1b811 100644 --- a/cleanup.py +++ b/cleanup.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python """ Cleanup for Singularity container