From c66ab257e6d72a2cf64f0e33977ce661c301b208 Mon Sep 17 00:00:00 2001 From: John Readey Date: Tue, 28 Feb 2023 08:40:35 +0100 Subject: [PATCH] Hstouchdocupdate (#140) * refactor app help, pass1 * unify arg processing for additional cmds * clean up pyflake warnings --- h5pyd/__init__.py | 12 -- h5pyd/_apps/config.py | 301 +++++++++++++++++++++++++++--- h5pyd/_apps/hsacl.py | 209 +++++++++------------ h5pyd/_apps/hscopy.py | 412 +++++++++++++++-------------------------- h5pyd/_apps/hsdel.py | 150 +++++++-------- h5pyd/_apps/hsdiff.py | 248 +++++++++---------------- h5pyd/_apps/hsget.py | 196 ++++++-------------- h5pyd/_apps/hsinfo.py | 270 ++++++--------------------- h5pyd/_apps/hsload.py | 408 ++++++++++++---------------------------- h5pyd/_apps/hsls.py | 161 ++++++---------- h5pyd/_apps/hsmv.py | 189 ++++++------------- h5pyd/_apps/hsstat.py | 207 +++++++++++++++++++++ h5pyd/_apps/hstouch.py | 179 ++++++++---------- h5pyd/_apps/utillib.py | 10 +- setup.py | 1 + 15 files changed, 1303 insertions(+), 1650 deletions(-) create mode 100644 h5pyd/_apps/hsstat.py diff --git a/h5pyd/__init__.py b/h5pyd/__init__.py index 5ce00d17..089e3ba0 100644 --- a/h5pyd/__init__.py +++ b/h5pyd/__init__.py @@ -12,16 +12,6 @@ from __future__ import absolute_import -#from . import _conv -#_conv.register_converters() - -#from . import h5a, h5d, h5ds, h5f, h5fd, h5g, h5r, h5s, h5t, h5p, h5z - -#h5s.NULL = h5s._NULL # NULL is a reserved name at the Cython layer -#h5z._register_lzf() - -#from .highlevel import * - from . import version from ._hl.base import Empty from ._hl.h5type import special_dtype, Reference, RegionReference @@ -40,11 +30,9 @@ from .config import Config -#from . import hsinfo __version__ = version.version - __doc__ = \ """ This is the h5pyd package, a Python interface to the HDF REST Server. diff --git a/h5pyd/_apps/config.py b/h5pyd/_apps/config.py index 5a3e76d9..1688a14f 100755 --- a/h5pyd/_apps/config.py +++ b/h5pyd/_apps/config.py @@ -10,14 +10,98 @@ # request a copy from help@hdfgroup.org. # ############################################################################## import os +import sys import json +import logging + +default_cfg = { + "hs_endpoint": { + "default": None, + "flags": ["-e", "--endpoint"], + "help": "server endpoint, e.g. http://hsdshdflab.hdfgroup.org", + "choices": ["ENDPOINT",] + }, + "hs_username": { + "default": None, + "flags": ["-u", "--user"], + "help": "user name credential", + "choices": ["USERNAME",] + }, + "hs_password": { + "default": None, + "flags": ["-p", "--password"], + "help": "password credential", + "choices": ["PASSWORD",] + }, + "hs_api_key": { + "default": None, + "flags": ["--api_key",], + "help": "user api key", + "choices": ["API_KEY"] + }, + "hs_bucket": { + "default": None, + "flags": ["--bucket",], + "help": "storage Bucket to use (S3 Bucket, Azure Container, or top-level directory)", + "choices": ["BUCKET",] + }, + + "loglevel": { + "default": "error", + "flags": ["--loglevel",], + "help": "logging verbosity", + "choices": ["debug", "info", "warning", "error"], + }, + "logfile": { + "default": None, + "flags": ["--logfile",], + "help": "file to send logout to (otherwise stdout)", + "choices": ["FILENAME",] + }, + "verbose": { + "default": False, + "flags": ["--verbose", "-v"], + "help": "verbose output", + }, + "ignore": { + "default": False, + "flags": ["--ignore",], + "help": "don't exit on error" + } +} + +hscmds = ("hsinfo", "hsconfigure", "hsls", "hstouch", "hsload", "hsget", "hsacl", "hsrm", "hsdiff") class Config: """ User Config state """ - def __init__(self, config_file=None, **kwargs): - self._cfg = {} + def __init__(self, config_file=None, custom_entries=[], **kwargs): + self._names = [] + self._values = {} + self._flags = {} + self._help = {} + self._choices = {} + self._flag_map = {} + + # set default entries + for defaults in (default_cfg, custom_entries): + for name in defaults: + if name in self._names: + raise ValueError(f"config {name} already set") + entry = defaults[name] + self._names.append(name) + if "default" in entry: + self._values[name] = entry["default"] + if "flags" in entry: + self._flags[name] = entry["flags"] + for flag in entry["flags"]: + self._flag_map[flag] = name + if "help" in entry: + self._help[name] = entry["help"] + if "choices" in entry: + self._choices[name] = entry["choices"] + if config_file: self._config_file = config_file elif os.path.isfile(".hscfg"): @@ -38,54 +122,215 @@ def __init__(self, config_file=None, **kwargs): continue fields = s.split('=') if len(fields) < 2: - print("config file: {} line: {} is not valid".format(self._config_file, line_number)) + print(f"config file: {self._config_file} line: {line_number} is not valid") continue k = fields[0].strip() v = fields[1].strip() - self._cfg[k] = v + if k not in self._names: + raise ValueError(f"undefined option: {name}") + if k in self._choices: + choices = self._choices[k] + if len(choices) > 1 and v not in self._choices: + raise ValueError(f"option {k} must be one of {choices}") + self._values[k] = v # override any config values with environment variable if found - for k in self._cfg.keys(): + for k in self._names: if k.upper() in os.environ: - self._cfg[k] = os.environ[k.upper()] + v = os.environ[k.upper()] + if name in self._choices: + choices = self._choices[name] + if len(choices) > 1 and v not in self._choices: + raise ValueError(f"option {name} must be one of {choices}") + self._values[name] = v # finally update any values that are passed in to the constructor - for k in kwargs.keys(): - self._cfg[k] = kwargs[k] + for name in kwargs.keys(): + if name in self._names: + v = kwargs[name] + if name in self._choices: + choices = self._choices[name] + if len(choices) > 1 and v not in self._choices: + raise ValueError(f"option {name} must be one of {choices}") + self._values[name] = kwargs[name] def __getitem__(self, name): """ Get a config item """ - if name not in self._cfg: - if name.upper() in os.environ: - self._cfg[name] = os.environ[name.upper()] - else: - return None - return self._cfg[name] - - def __setitem__(self, name, obj): - """ set config item """ - self._cfg[name] = obj + if name not in self._names: + return None + return self._values[name] - def __delitem__(self, name): - """ Delete option. """ - del self._cfg[name] + def setitem(self, name, value, flags=None, choices=None, help=None): + """ Set a config item """ + if name not in self._names: + self._names.append(name) + self._values[name] = value + if flags is not None: + self._flags[name] = flags + for flag in flags: + self._flag_map[flag] = name + if choices is not None: + self._choices[name] = choices + if help is not None: + self._help[name] = help + def __setitem__(self, name, value): + self.setitem(name, value) + def __len__(self): - return len(self._cfg) + return len(self._names) def __iter__(self): """ Iterate over config names """ - keys = self._cfg.keys() - for key in keys: - yield key + for name in self._names: + yield name def __contains__(self, name): - return name in self._cfg + return name in self._names def __repr__(self): - return json.dumps(self._cfg) + return json.dumps(self._values) def keys(self): - return self._cfg.keys() + return self._names + + def get_flags(self, name): + if name in self._flags: + return self._flags[name] + else: + return None + + def get_help(self, name): + if name in self._help: + return self._help[name] + else: + return None + + def get_see_also(self, this_cmd): + msg = "See also the commands: " + for cmd in hscmds: + if cmd != this_cmd: + msg += f"{cmd}, " + msg = msg[:-2] # remove trailing comma + return msg + + + def get_help_message(self, name): + help_text= self.get_help(name) + flags = self.get_flags(name) + choices = self.get_choices(name) + if not help_text or len(flags) == 0: + return None + + msg = flags[0] + for i in range(1, len(flags)): + msg += f", {flags[i]}" + if choices: + if len(choices) == 1: + msg += f" {choices[0]}" + else: + msg += " {" + for choice in choices: + msg += f"{choice}|" + msg = msg[:-1] + msg += "}" + if len(msg) < 40: + pad = " "*(40 - len(msg)) + msg += pad + + msg += f" {help_text}" + + return msg + + + def get_nargs(self, name): + choices = self._choices.get(name) + if not choices: + return 0 + else: + return 1 + + def get_choices(self, name): + if name in self._choices: + return self._choices[name] + else: + return 0 + + def get_names(self): + return self._names + + def set_cmd_flags(self, args, allow_post_flags=False): + """ process any command line options + return any place argument as a list + """ + options = [] + argn = 0 + while argn < len(args): + arg = args[argn] + val = None + if len(args) > argn + 1: + val = args[argn+1] + if not arg.startswith("-"): + options.append(arg) + argn += 1 + elif options: + if allow_post_flags: + options.append(arg) + argn += 1 + else: + raise ValueError("flags must be set before positional arguments") + else: + name = self._flag_map.get(arg) + if arg in ("-h", "--help"): + raise ValueError() # trigger print usage + if name not in self._names: + raise ValueError("option not found") + if not self.get_nargs(name): + # set flag + self._values[name] = True + argn += 1 + else: + if not val: + raise ValueError("option value missing") + if self._choices.get(name): + choices = self._choices.get(name) + + if choices and len(choices) > 1 and val not in self._choices.get(name): + raise ValueError(f"option value must be one of {self._choices.get(name)}") + self._values[name] = val + argn += 2 + return options + + def get_loglevel(self): + val = self._values["loglevel"] + val = val.upper() + choices = ("DEBUG", "INFO", "WARNING", "ERROR") + if val == "DEBUG": + loglevel = logging.DEBUG + elif val == "INFO": + loglevel = logging.INFO + elif val in ("WARN", "WARNING"): + loglevel = logging.WARNING + elif val == "ERROR": + loglevel = logging.ERROR + else: + raise ValueError(f"loglevel must be one of {choices}") + return loglevel + + def get_cmd(self): + """ return command argument used to invoke""" + cmd = sys.argv[0].split('/')[-1] + if cmd.endswith(".py"): + cmd = "python " + cmd + return cmd + + def print(self, msg): + if self._values.get("logfile"): + # write msg to logfile as info + logging.info(msg) + if self._values.get("verbose"): + print(msg) + + diff --git a/h5pyd/_apps/hsacl.py b/h5pyd/_apps/hsacl.py index b289146a..3fe3ec03 100755 --- a/h5pyd/_apps/hsacl.py +++ b/h5pyd/_apps/hsacl.py @@ -20,6 +20,17 @@ cfg = Config() +# +# log error and abort app +# +def abort(msg): + logging.error(msg) + if cfg["logfile"]: + # write to stderr if we are output logs to a file + sys.stderr.write(msg + "\n") + logging.error("exiting program with return code -1") + sys.exit(-1) + # # get given ACL, return None if not found # @@ -46,142 +57,105 @@ def getACL(f, username="default"): # # Usage # -def printUsage(): +def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() + print("Usage:") print("") - print("Usage: {} [options] domain [+crudep] [-crudep] [userid1 userid2 ...]".format(cfg["cmd"])) + print(f" {cmd} [options] domain [+crudep] [-crudep] [userid1 userid2 ...]") + print("") + print("Description:") + print(" Display, add, or change ACLs for a domain or folder") print("") print("Options:") - print(" -v | --verbose :: verbose output") - print(" -e | --endpoint :: The HDF Server endpoint, e.g. http://hsdshdflab.hdfgroup.org") - print(" -u | --user :: User name credential") - print(" -p | --password :: Password credential") - print(" --logfile :: logfile path") - print(" --loglevel debug|info|warning|error :: Change log level") - print(" --bucket :: Storage bucket") - print(" -h | --help :: This message.") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") + print("") + print("Arguments:") - print(" domain :: Domain or Folder to be updated") - print(" +/- :: add or remove permissions") - print(" crudep :: permission flags: Create, Read, Update, Delete, rEadacl, uPdateacl") + print(" domain :: Domain or Folder to be updated") + print(" +/- :: add or remove permissions") + print(" crudep :: permission flags: Create, Read, Update, Delete, rEadacl, uPdateacl") + msg = " userid1, userid2, etc.: list of usernames, group names (group names distinguished " + msg += "by 'g:' prefix) or 'default' to set permissions for those not otherwise listed" + print(msg) + print("") + print("Examples:") + print(f" list acls: {cmd} /home/jill/myfile.h5") + print(f" list ted's acl (if any): {cmd} /home/jill/myfile.h5 ted") + print(f" add/update acl to give ted read & update permissions: {cmd} /home/jill/myfile.h5 +ru ted") + print(f" remove all permissions except read for jill: {cmd} /home/jill/myfile.h5 -cudep jill") + print(f" enable create, update, and read ACL for devs group: {cmd} /shared/datafile.h5 +cup g:devs") + print(f" enable domain and ACLs to be read by anyone: {cmd} /home/jill/myfile.h5 +re default") print("") - print("examples...") - print("list acls: {} /home/jill/myfile.h5".format(cfg["cmd"])) - print("list ted's acl (if any): {} /home/jill/myfile.h5 ted".format(cfg["cmd"])) - print("add/update acl to give ted read & update permissions: {} /home/jill/myfile.h5 +ru ted".format(cfg["cmd"])) - print("remove all permissions except read for jill: {} /home/jill/myfile.h5 -cudep jill".format(cfg["cmd"])) + print(cfg.get_see_also(cmd)) print("") sys.exit() def main(): - cfg["cmd"] = sys.argv[0].split('/')[-1] - if cfg["cmd"].endswith(".py"): - cfg["cmd"] = "python " + cfg["cmd"] - cfg["verbose"] = False - perm_abvr = {'c':'create', 'r': 'read', 'u': 'update', 'd': 'delete', 'e': 'readACL', 'p':'updateACL'} fields = ('username', 'create', 'read', 'update', 'delete', 'readACL', 'updateACL') domain = None perm = None - loglevel = logging.ERROR - logfname = None usernames = [] add_list = set() remove_list = set() - if len(sys.argv) == 1 or sys.argv[1] == "-h": - printUsage() + # additional options + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") - argn = 1 - while argn < len(sys.argv): - arg = sys.argv[argn] - val = None - if len(sys.argv) > argn + 1: - val = sys.argv[argn+1] + + try: + cmdline_args = cfg.set_cmd_flags(sys.argv[1:], allow_post_flags=True) + except ValueError as ve: + print(ve) + usage() + + if len(cmdline_args) == 0: + # need a domain + usage() - if arg in ("-v", "--verbose"): - cfg["verbose"] = True - argn += 1 - elif arg == "--loglevel": - val = val.upper() - if val == "DEBUG": - loglevel = logging.DEBUG - elif val == "INFO": - loglevel = logging.INFO - elif val in ("WARN", "WARNING"): - loglevel = logging.WARNING - elif val == "ERROR": - loglevel = logging.ERROR - else: - printUsage() - argn += 2 - elif domain is None and arg == '--logfile': - logfname = val - argn += 2 - elif domain is None and arg in ("-h", "--help"): - printUsage() - elif domain is None and arg in ("-e", "--endpoint"): - cfg["hs_endpoint"] = val - argn += 2 - elif domain is None and arg in ("-u", "--username"): - cfg["hs_username"] = val - argn += 2 - elif domain is None and arg in ("-p", "--password"): - cfg["hs_password"] = val - argn += 2 - elif arg in ("-b", "--bucket"): - cfg["hs_bucket"] = val - argn += 2 - elif domain is None and arg[0] in ('-', '+'): - print("No domain given") - printUsage() - elif domain is None: + # setup logging + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() + logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) + logging.debug(f"set log_level to {loglevel}") + + for arg in cmdline_args: + if domain is None: domain = arg - if domain[0] != '/': - print("Domain must start with '/'") - printUsage() - argn += 1 elif arg[0] == '+': if len(usernames) > 0: - printUsage() + abort("no usernames given!") add_list = set(arg[1:]) - argn += 1 - elif arg[0] == '-': if len(usernames) > 0: - printUsage() + abort("remove flags must be placed before usernames!") remove_list = set(arg[1:]) - argn += 1 else: if arg.find('/') >= 0: - print("Invalid username:", arg) - printUsage() + abort(f"invalid username: {arg}") usernames.append(arg) - argn += 1 - - # setup logging - logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) - logging.debug("set log_level to {}".format(loglevel)) - logging.info("domain: {}".format(domain)) - logging.info("add_list: {}".format(add_list)) - logging.info("remove_list: {}".format(remove_list)) - logging.info("usernames: {}".format(usernames)) + logging.info(f"domain: {domain}") + logging.info(f"add_list: {add_list}") + logging.info(f"remove_list: {remove_list}") + logging.info(f"usernames: {usernames}") if len(usernames) == 0 and (add_list or remove_list): - print("At least one username must be given to add/remove permissions") - printUsage() + abort("at least one username must be given to add/remove permissions") if domain is None: - print("no domain specified") - sys.exit(1) + abort("no domain specified") conflicts = list(add_list & remove_list) if len(conflicts) > 0: - print("permission: ", conflicts[0], " permission flag set for both add and remove") - sys.exit(1) + abort(f"permission: {conflicts[0]} flag set for both add and remove") mode = 'r' if add_list or remove_list: @@ -189,14 +163,12 @@ def main(): perm = {} for x in add_list: if x not in perm_abvr: - print("Permission flag: {} is not valid - must be one of 'crudep;".format(x)) - sys.exit(1) + abort("Permission flag: {x} is not valid - must be one of 'crudep'") perm_name = perm_abvr[x] perm[perm_name] = True for x in remove_list: if x not in perm_abvr: - print("Permission flag: {} is not valid - must be one of 'crudep;".format(x)) - sys.exit(1) + abort(f"Permission flag: {x} is not valid - must be one of 'crudep'") perm_name = perm_abvr[x] perm[perm_name] = False logging.info("perm:", perm) @@ -209,14 +181,11 @@ def main(): f = h5pyd.File(domain, mode=mode, endpoint=cfg["hs_endpoint"], username=cfg["hs_username"], password=cfg["hs_password"], bucket=cfg["hs_bucket"]) except IOError as ioe: if ioe.errno in (404, 410): - print("domain not found") - sys.exit(1) + abort("domain not found") elif ioe.errno in (401, 403): - print("access is not authorized") - sys.exit(1) + abort("access is not authorized") else: - print("Unexpected error:", ioe) - sys.exit(1) + abort(f"Unexpected error: {ioe}") # update/add ACL if permission flags have been set if perm: @@ -243,7 +212,7 @@ def main(): if acl is None: acl = default_acl.copy() acl["userName"] = username - logging.info("updating acl to: {}".format(acl)) + logging.info(f"updating acl to: {acl}") # mix in any permission changes for k in perm: acl[k] = perm[k] @@ -251,10 +220,9 @@ def main(): f.putACL(acl) except IOError as ioe: if ioe.errno in (401, 403): - print("access is not authorized") + abort("access is not authorized") else: - print("Unexpected error:", ioe) - sys.exit(1) + abort("Unexpected error:", ioe) # # read the acls # @@ -264,13 +232,12 @@ def main(): acls = f.getACLs() except IOError as ioe: if ioe.errno == 403: - print("User {} does not have permission to read ACL for this domain".format(cfg["hs_username"])) - sys.exit(1) + username = cfg["hs_username"] + abort(f"User: {username} does not have permission to read ACL for this domain") elif ioe.errno == 401: - print("username/password needs to be provided") - sys.exit(1) + abort("username/password needs to be provided") else: - print("Unexpected error: {}".format(ioe)) + abort(f"Unexpected error: {ioe}") print("%015s %08s %08s %08s %08s %08s %08s " % fields) print("-"*80) for acl in acls: @@ -289,16 +256,14 @@ def main(): print("%015s %08s %08s %08s %08s %08s %08s " % vals) except IOError as ioe: if ioe.errno == 403: - print("User {} does not have permission to read ACL for this domain".format(cfg["hs_username"])) - sys.exit(1) + this_user = cfg["hs_username"] + abort(f"User {this_user} does not have permission to read ACL for this domain") elif ioe.errno == 401: - print("username/password needs to be provided") - sys.exit(1) + abort("username/password needs to be provided") elif ioe.errno == 404: - print(username, "") + abort(f"{username} not found") else: - print("Unexpected error:", ioe) - sys.exit(1) + abort(f"Unexpected error: {ioe}") f.close() diff --git a/h5pyd/_apps/hscopy.py b/h5pyd/_apps/hscopy.py index 6e64940a..05141333 100755 --- a/h5pyd/_apps/hscopy.py +++ b/h5pyd/_apps/hscopy.py @@ -12,17 +12,7 @@ import sys import logging - -try: - import h5pyd -except ImportError as e: - sys.stderr.write("ERROR : %s : install it to use this utility...\n" % str(e)) - sys.exit(1) - -try: - import pycurl as PYCRUL -except ImportError: - PYCRUL = None +import h5pyd if __name__ == "__main__": from config import Config @@ -31,198 +21,141 @@ from .config import Config from .utillib import load_file - cfg = Config() - # ---------------------------------------------------------------------------------- def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() print("Usage:\n") - print((" {} [ OPTIONS ] source destination".format(cfg["cmd"]))) + print(f" {cmd} [ OPTIONS ] SOURCE DEST") print("") print("Description:") - print(" Copy domain") - print(" source: domain to be copied ") - print(" destination: target domain") + print(" Copy HSDS domain to target") + print(" SOURCE: HSDS domain (absolute path with or without 'hdf5:// prefix)") + print(" DEST: HSDS domain or folder (path as above ending in '/')") print("") + print("Options:") - print(" -v | --verbose :: verbose output") - print( - " -e | --endpoint :: The HDF Server endpoint, e.g. http://hsdshdflab.hdfgroup.org" - ) - print(" -u | --user :: User name credential") - print(" -p | --password :: Password credential") - print(" --src_endpoint :: The HDF Server endpoint for src file") - print(" --src_user :: User name credential for src file") - print(" --src_password :: Password credential for src file") - print(" --des_endpoint :: The HDF Server endpoint for des file") - print(" --des_user :: User name credential for des file") - print(" --des_password :: Password credential for des file") - print(" -c | --conf :: A credential and config file") - print( - " -z[n] :: apply compression filter to any non-compressed datasets, n: [0-9]" - ) - print(" -n | --no-clobber :: Do not overwrite existing domains") - print(" --cnf-eg :: Print a config file and then exit") - print(" --logfile :: logfile path") - print(" --loglevel debug|info|warning|error :: Change log level") - print(" --bucket :: Storage bucket") - print(" --src_bucket :: Storage bucket for src file") - print(" --des_bucket :: Storage bucket for des file") - print(" --nodata :: Do not upload dataset data") - print(" --ignore :: Don't exit on error") - print(" -h | --help :: This message.") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") + print("") + print("examples:") + print(f" {cmd} /myfolder/orig.h5 /myfolder/copy.h5") + print(f" {cmd} /myfolder/orig.h5 /anotherfolder/") + print(f" {cmd} -z 5 /myfolder/uncompressed.h5 /myfolder/compressed.h5") print("") + print(cfg.get_see_also(cmd)) + print("") + sys.exit() + + +def getFile(domain): + username = cfg["src_username"] + if not username: + username = cfg["hs_username"] + password = cfg["src_password"] + if not password: + password = cfg["hs_password"] + endpoint = cfg["src_endpoint"] + if not endpoint: + endpoint = cfg["hs_endpoint"] + bucket = cfg["src_bucket"] + if not bucket: + bucket = cfg["hs_bucket"] + + fh = h5pyd.File(domain, + mode='r', + endpoint=endpoint, + username=username, + password=password, + bucket=bucket) + + return fh + +def createFile(domain, linked_domain=None, no_clobber=False): + #print("createFile", domain) + username = cfg["des_username"] + if not username: + username = cfg["hs_username"] + password = cfg["des_password"] + if not password: + password = cfg["hs_password"] + endpoint = cfg["des_endpoint"] + if not endpoint: + endpoint = cfg["hs_endpoint"] + bucket = cfg["des_bucket"] + if not bucket: + bucket = cfg["hs_bucket"] + if cfg["no_clobber"]: + mode= "x" + else: + mode="w" + + fh = h5pyd.File(domain, + mode=mode, + endpoint=endpoint, + username=username, + password=password, + bucket=bucket) + + return fh # end print_usage # ---------------------------------------------------------------------------------- -def print_config_example(): - print("# default") - print("hs_username = ") - print("hs_password = ") - print("hs_endpoint = http://hsdshdflab.hdfgroup.org") +def main(): + + cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite any domains") + cfg.setitem("src_endpoint", None, flags=["--src-endpoint"], choices=["ENDPOINT",], help="server endpoint for source domain") + cfg.setitem("src_username", False, flags=["--src-user"], choices=["USERNAME",], help="user name credential for source domain") + cfg.setitem("src_password", False, flags=["--src-password"], choices=["PASSWORD",], help="password credential for source domain") + cfg.setitem("src_bucket", False, flags=["--src-bucket"], choices=["BUCKET"], help="storage bucket for source domain") + cfg.setitem("des_endpoint", None, flags=["--des-endpoint"], choices=["ENDPOINT",], help="server endpoint for dest domain") + cfg.setitem("des_username", False, flags=["--des-user"], choices=["USERNAME",], help="user name credential for dest domain") + cfg.setitem("des_password", False, flags=["--des-password"], choices=["PASSWORD",], help="password credential for dest domain") + cfg.setitem("des_bucket", False, flags=["--des-bucket"], choices=["BUCKET"], help="storage bucket for dest domain") + cfg.setitem("compress", 0, flags=["-z",], choices=["LEVEL",], help="compression level from 0 (no compression) to 9 (highest)") + cfg.setitem("nodata", False, flags=["--nodata",], help="do not copy dataset data") + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") + try: + domains = cfg.set_cmd_flags(sys.argv[1:]) + except ValueError as ve: + print(ve) + usage() -# print_config_example + if len(domains) < 2: + usage() + + src_domain = domains[0] + des_domain = domains[1] -# ---------------------------------------------------------------------------------- -def main(): + if cfg["nodata"]: + dataload = None + else: + dataload = "ingest" - loglevel = logging.ERROR - verbose = False - ignore_error = False - dataload = "ingest" compressLevel = None - no_clobber = False - cfg["cmd"] = sys.argv[0].split("/")[-1] - if cfg["cmd"].endswith(".py"): - cfg["cmd"] = "python " + cfg["cmd"] - cfg["logfname"] = None - logfname = None - - src_files = [] - argn = 1 - while argn < len(sys.argv): - arg = sys.argv[argn] - val = None - - if arg[0] == "-" and len(src_files) > 0: - # options must be placed before filenames - print("options must precead source files") - usage() - sys.exit(-1) - if len(sys.argv) > argn + 1: - val = sys.argv[argn + 1] - if arg in ("-v", "--verbose"): - verbose = True - argn += 1 - elif arg == "--nodata": - dataload = None - argn += 1 - elif arg == "--loglevel": - if val == "debug": - loglevel = logging.DEBUG - elif val == "info": - loglevel = logging.INFO - elif val == "warning": - loglevel = logging.WARNING - elif val == "error": - loglevel = logging.ERROR - else: - print("unknown loglevel") - usage() - sys.exit(-1) - argn += 2 - elif arg == "--logfile": - logfname = val - argn += 2 - elif arg in ("-h", "--help"): - usage() - sys.exit(0) - elif arg in ("-e", "--endpoint"): - cfg["hs_endpoint"] = val - argn += 2 - elif arg == "--src_endpoint": - cfg["src_hs_endpoint"] = val - argn += 2 - elif arg == "--des_endpoint": - cfg["des_hs_endpoint"] = val - argn += 2 - elif arg in ("-u", "--username"): - cfg["hs_username"] = val - argn += 2 - elif arg == "--src_username": - cfg["src_hs_username"] = val - argn += 2 - elif arg == "--des_username": - cfg["des_hs_username"] = val - argn += 2 - elif arg in ("-p", "--password"): - cfg["hs_password"] = val - argn += 2 - elif arg == "--src_password": - cfg["src_hs_password"] = val - argn += 2 - elif arg == "--des_password": - cfg["des_hs_password"] = val - argn += 2 - elif arg in ("-b", "--bucket"): - cfg["hs_bucket"] = val - argn += 2 - elif arg == "--src_bucket": - cfg["src_hs_bucket"] = val - argn += 2 - elif arg == "--des_bucket": - cfg["des_hs_bucket"] = val - argn += 2 - elif arg in ("-n", "--no-clobber"): - no_clobber = True - argn += 1 - elif arg == "--cnf-eg": - print_config_example() - sys.exit(0) - elif arg.startswith("-z"): - compressLevel = 4 - if len(arg) > 2: - try: - compressLevel = int(arg[2:]) - except ValueError: - print("Compression Level must be int between 0 and 9") - sys.exit(-1) - argn += 1 - elif arg == "--ignore": - ignore_error = True - argn += 1 - elif arg[0] == "-": - print("got unknown arg:", arg) - usage() - sys.exit(-1) - else: - src_files.append(arg) - argn += 1 - + if cfg["compress"]: + try: + compressLevel = int(cfg["compress"]) + except ValueError: + msg = "Compression Level must be int between 0 and 9" + logging.error(msg) + sys.exit(msg) + # setup logging - logging.basicConfig( - filename=logfname, - format="%(levelname)s %(asctime)s %(filename)s:%(lineno)d %(message)s", - level=loglevel, - ) - logging.debug("set log_level to {}".format(loglevel)) - - # end arg parsing - logging.info("verbose: {}".format(verbose)) - - if len(src_files) < 2: - # need at least a src and destination - usage() - sys.exit(-1) - src_domain = src_files[0] - des_domain = src_files[1] + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() + logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) + logging.debug(f"set log_level to {loglevel}") - logging.info("source domain: {}".format(src_domain)) - logging.info("target domain: {}".format(des_domain)) + logging.info(f"source domain: {src_domain}") + logging.info(f"target domain: {des_domain}") if src_domain.startswith("/") or src_domain.startswith("hdf5://"): logging.debug("source domain path is absolute") @@ -244,107 +177,54 @@ def main(): sys.exit(msg) if des_domain[-1] == "/": - msg = "target domain can't be a folder" + # pull out the basename of src and add it to the + # end of des_domain + fields = src_domain.split("/") + des_domain += fields[-1] + cfg.print(f"using {des_domain} for destination") + + # get a handle to input file + try: + fin = getFile(src_domain) + except IOError as ioe: + msg = f"Error opening file {src_domain}: {ioe.errno}" logging.error(msg) sys.exit(msg) - if cfg["hs_endpoint"]: - logging.info("endpoint: {}".format(cfg["hs_endpoint"])) - elif cfg["src_hs_endpoint"] and cfg["des_hs_endpoint"]: - logging.info("src_endpoint: {}".format(cfg["src_hs_endpoint"])) - logging.info("des_endpoint: {}".format(cfg["des_hs_endpoint"])) - else: - logging.error("No endpoint given, try -h for help\n") - sys.exit(1) try: - if cfg["src_hs_username"]: - username = cfg["src_hs_username"] - else: - username = cfg["hs_username"] - if cfg["src_hs_password"]: - password = cfg["src_hs_password"] - else: - password = cfg["hs_password"] - if cfg["src_hs_endpoint"]: - endpoint = cfg["src_hs_endpoint"] - else: - endpoint = cfg["hs_endpoint"] - if cfg["src_hs_bucket"]: - bucket = cfg["src_hs_bucket"] - else: - bucket = cfg["hs_bucket"] - # get a handle to input file - try: - fin = h5pyd.File( - src_domain, - mode="r", - endpoint=endpoint, - username=username, - password=password, - bucket=bucket, - ) - except IOError as ioe: - logging.error("Error opening file {}: {}".format(src_domain, ioe)) - sys.exit(1) - - # create the output domain - if cfg["des_hs_username"]: - username = cfg["des_hs_username"] - else: - username = cfg["hs_username"] - if cfg["des_hs_password"]: - password = cfg["des_hs_password"] - else: - password = cfg["hs_password"] - if cfg["des_hs_endpoint"]: - endpoint = cfg["des_hs_endpoint"] + fout = createFile(des_domain) + except IOError as ioe: + if ioe.errno == 403: + msg = f"No write access to domain: {des_domain}" + elif ioe.errno == 409 and cfg["no_clobber"]: + msg = f"DEST domain: {des_domain} exists, aborting copy" else: - endpoint = cfg["hs_endpoint"] - if cfg["des_hs_bucket"]: - bucket = cfg["des_hs_bucket"] - else: - bucket = cfg["hs_bucket"] - try: - if no_clobber: - mode = "x" - else: - mode = "w" - fout = h5pyd.File( - des_domain, - mode=mode, - endpoint=endpoint, - username=username, - password=password, - bucket=bucket, - ) - except IOError as ioe: - if ioe.errno == 403: - logging.error("No write access to domain: {}".format(des_domain)) - else: - logging.error("Error creating file {}: {}".format(des_domain, ioe)) - sys.exit(1) - - if compressLevel is not None: - compress_filter = "deflate" # TBD - add option for other compressors - else: - compress_filter = None + msg = f"Error creating file {des_domain}: {ioe.errno}" + logging.error(msg) + sys.exit(msg) + if compressLevel: + compress_filter = "deflate" # TBD - add option for other compressors + else: + compress_filter = None + + print("load_file, compressLevel:", compressLevel) + + try: # do the actual load load_file( fin, fout, - verbose=verbose, - ignore_error=ignore_error, + verbose=cfg["verbose"], + ignore_error=cfg["ignore"], dataload=dataload, compression=compress_filter, compression_opts=compressLevel, ) - msg = "File {} uploaded to domain: {}".format(src_domain, des_domain) - logging.info(msg) - if verbose: - print(msg) + msg = f"File {src_domain} uploaded to domain: {des_domain}" + cfg.print(msg) except KeyboardInterrupt: logging.error("Aborted by user via keyboard interrupt.") diff --git a/h5pyd/_apps/hsdel.py b/h5pyd/_apps/hsdel.py index 1d0ec083..1a3b5352 100644 --- a/h5pyd/_apps/hsdel.py +++ b/h5pyd/_apps/hsdel.py @@ -18,8 +18,14 @@ def getFolder(domain, mode='r'): password = cfg["hs_password"] endpoint = cfg["hs_endpoint"] bucket = cfg["hs_bucket"] - dir = h5py.Folder(domain, mode=mode, endpoint=endpoint, username=username, password=password, bucket=bucket) - return dir + folder = h5py.Folder(domain, mode=mode, endpoint=endpoint, username=username, password=password, bucket=bucket) + return folder + +def exitUnlessIgnore(msg): + if cfg["ignore"]: + return + sys.exit(msg) + def deleteDomain(domain): @@ -42,18 +48,28 @@ def deleteDomain(domain): hparent = getFolder(parent_domain, mode='a') except IOError as oe: if oe.errno == 404: # Not Found - sys.exit("Parent domain: {} not found".format(parent_domain)) + msg = f"Parent domain: {parent_domain} not found" + logging.error(msg) + exitUnlessIgnore(msg) elif oe.errno == 401: # Unauthorized - sys.exit("Authorization failure") + msg = f"Authorization failure opening {parent_domain}" + logging.error(msg) + exitUnlessIgnore(msg) elif oe.errno == 403: # Forbidden - sys.exit("Not allowed") + msg = f"Not allowed to open: {parent_domain}" + logging.error(msg) + exitUnlessIgnore(msg) else: - sys.exit("Unexpected error: {}".format(oe)) + msg = f"Unexpected error: {oe}" + logging.error(msg) + exitUnlessIgnore(msg) if base_name not in hparent: # note - this may happen if the domain was recently created and not # yet synced to S3 - sys.exit("domain: {} not found".format(domain)) + msg = f"domain: {domain} not found" + logging.error(msg) + exitUnlessIgnore(msg) # delete the domain try: @@ -61,94 +77,82 @@ def deleteDomain(domain): except IOError as oe: if oe.errno == 404: # Not Found # should have caught this in the base_name check... - sys.exit("domain: {} not found".format(parent_domain)) + msg = f"domain {parent_domain} not found" + logging.error(msg) + exitUnlessIgnore(msg) elif oe.errno == 401: # Unauthorized - sys.exit("Authorization failure") + msg = "Authorization failure" + logging.error(msg) + exitUnlessIgnore(msg) elif oe.errno == 403: # Forbidden - sys.exit("Not allowed") + msg = "Not Allowed" + logging.error(msg) + exitUnlessIgnore(msg) elif oe.errno == 409 and domain.endswith('/'): # Conflict - sys.exit("folder has sub-items") + msg = "folder has sub-items" + logging.error(msg) + exitUnlessIgnore(msg) else: - sys.exit("Unexpected error: {}".format(oe)) + msg = f"Unexpected error: {oe}" + logging.error(msg) + exitUnlessIgnore(msg) if cfg["verbose"]: if domain.endswith('/'): - print("Folder: {} deleted".format(domain)) + msg = f"Folder: {domain} deleted" else: - print("Domain: {} deleted".format(domain)) + msg = f"Domain: {domain} deleted" + cfg.print(msg) + # # Usage # -def printUsage(): - print("usage: {} [-v] [-e endpoint] [-u username] [-p password] [--loglevel debug|info|warning|error] [--logfile ] [--bucket ] domains".format(cfg["cmd"])) - print("example: {} -e http://hsdshdflab.hdfgroup.org /hdfgroup/data/test/deleteme.h5".format(cfg["cmd"])) +def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() + print("Usage:\n") + print(f" {cmd} [ OPTIONS ] target") + print("") + print("Description:") + print(" Delete given domains") + print(" target: one or more domains to be deleted") + print("") + print("Options:") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") + print("") + print("Examples:") + print(f" {cmd} /home/myfolder/file1.h5 /home/myfolder/file2.h5") + print(f" {cmd} hdf5://home/myfolder/file2.h5 hdf5://home/afolder/") + print("") + print(cfg.get_see_also(cmd)) + print("") sys.exit() # # Main # def main(): - domains = [] - argn = 1 - loglevel = logging.ERROR - logfname=None - cfg["cmd"] = sys.argv[0].split('/')[-1] - if cfg["cmd"].endswith(".py"): - cfg["cmd"] = "python " + cfg["cmd"] - cfg["verbose"] = False - - while argn < len(sys.argv): - arg = sys.argv[argn] - val = None - if len(sys.argv) > argn + 1: - val = sys.argv[argn+1] - - if arg in ("-h", "--help"): - printUsage() - elif arg in ("-e", "--endpoint"): - cfg["hs_endpoint"] = val - argn += 2 - elif arg in ("-u", "--username"): - cfg["hs_username"] = val - argn += 2 - elif arg in ("-p", "--password"): - cfg["hs_password"] = val - argn += 2 - elif arg in ("-b", "--bucket"): - cfg["hs_bucket"] = val - argn += 2 - elif arg in ("-v", "--verbose"): - cfg["verbose"] = True - argn += 1 - elif arg == "--loglevel": - val = val.upper() - if val == "DEBUG": - loglevel = logging.DEBUG - elif val == "INFO": - loglevel = logging.INFO - elif val in ("WARN", "WARNING"): - loglevel = logging.WARNING - elif val == "ERROR": - loglevel = logging.ERROR - else: - printUsage() - argn += 2 - elif arg == '--logfile': - logfname = val - argn += 2 - elif arg[0] == '-': - printUsage() - else: - domains.append(arg) - argn += 1 + + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") + + try: + domains = cfg.set_cmd_flags(sys.argv[1:]) + except ValueError as ve: + print(ve) + usage() if len(domains) == 0: # need a domain - printUsage() - + usage() + # setup logging + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) - logging.debug("set log_level to {}".format(loglevel)) + logging.debug(f"set log_level to {loglevel}") for domain in domains: deleteDomain(domain) diff --git a/h5pyd/_apps/hsdiff.py b/h5pyd/_apps/hsdiff.py index c27727aa..7f8a34dc 100755 --- a/h5pyd/_apps/hsdiff.py +++ b/h5pyd/_apps/hsdiff.py @@ -18,7 +18,7 @@ import h5py import h5pyd except ImportError as e: - sys.stderr.write("ERROR : %s : install it to use this utility...\n" % str(e)) + sys.stderr.write(f"ERROR : {str(e)} : install it to use this utility...\n") sys.exit(1) try: @@ -35,6 +35,15 @@ cfg = Config() +def getFile(domain, mode="r"): + username = cfg["hs_username"] + password = cfg["hs_password"] + endpoint = cfg["hs_endpoint"] + bucket = cfg["hs_bucket"] + fh = h5pyd.File(domain, mode=mode, endpoint=endpoint, username=username, + password=password, bucket=bucket, use_cache=True) + return fh + def diff_attrs(src, tgt, ctx): """compare attributes of src and tgt""" @@ -438,32 +447,29 @@ def object_diff_helper(name, obj): # ---------------------------------------------------------------------------------- def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() print("Usage:\n") - print((" {} [ OPTIONS ] file domain".format(cfg["cmd"]))) + print(f" {cmd} [ OPTIONS ] hdf5_file domain") print("") print("Description:") - print(" Diff HDF5 file with domain") - print(" file: HDF5 file ") + print(" Compare an HDF5 file to a domain") + print(" hdf5_file: hdf5_file") print(" domain: domain") print("") print("Options:") - print(" -v | --verbose :: verbose output") - print( - " -e | --endpoint :: The HDF Server endpoint, e.g. http://hsdshdflab.hdfgroup.org" - ) - print(" -u | --user :: User name credential") - print(" -p | --password :: Password credential") - print(" -c | --conf :: A credential and config file") - print(" --cnf-eg :: Print a config file and then exit") - print(" --logfile :: logfile path") - print(" --loglevel debug|info|warning|error :: Change log level") - print(" --bucket :: Storage bucket") - print(" --nodata :: Do not compare dataset data") - print(" --noattr :: Do not compare attributes") - print(" --quiet :: Do not produce output") - print(" -h | --help :: This message.") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") print("") - + print("Examples:") + print(f" {cmd} myfile.h5 /home/myfolder/myfile.h5") + print(f" {cmd} s3://myybucket/myfile.h5 /home/myfolder/myfile.h5") + print("") + print(cfg.get_see_also(cmd)) + print("") + sys.exit(1) # end print_usage @@ -481,125 +487,45 @@ def print_config_example(): # ---------------------------------------------------------------------------------- def main(): - loglevel = logging.ERROR - verbose = False - nodata = False - noattr = False - quiet = False - cfg["cmd"] = sys.argv[0].split("/")[-1] - if cfg["cmd"].endswith(".py"): - cfg["cmd"] = "python " + cfg["cmd"] - cfg["logfname"] = None - logfname = None - rc = 0 - s3 = None # s3fs instance - - src_files = [] - argn = 1 - while argn < len(sys.argv): - arg = sys.argv[argn] - val = None - - if arg[0] == "-" and len(src_files) > 0: - # options must be placed before filenames - print("options must precead source files") - usage() - sys.exit(-1) - if len(sys.argv) > argn + 1: - val = sys.argv[argn + 1] - if arg in ("-v", "--verbose"): - verbose = True - argn += 1 - elif arg == "--nodata": - nodata = True - argn += 1 - elif arg == "--noattr": - noattr = True - argn += 1 - elif arg in ("-q", "--quiet"): - quiet = True - argn += 1 - elif arg == "--loglevel": - if val == "debug": - loglevel = logging.DEBUG - elif val == "info": - loglevel = logging.INFO - elif val == "warning": - loglevel = logging.WARNING - elif val == "error": - loglevel = logging.ERROR - else: - print("unknown loglevel") - usage() - sys.exit(-1) - argn += 2 - elif arg == "--logfile": - logfname = val - argn += 2 - elif arg in ("-b", "--bucket"): - cfg["hs_bucket"] = val - argn += 2 - elif arg in ("-h", "--help"): - usage() - sys.exit(0) - elif arg in ("-e", "--endpoint"): - cfg["hs_endpoint"] = val - argn += 2 - elif arg in ("-u", "--username"): - cfg["hs_username"] = val - argn += 2 - elif arg in ("-p", "--password"): - cfg["hs_password"] = val - argn += 2 - elif arg == "--cnf-eg": - print_config_example() - sys.exit(0) - elif arg[0] == "-": - usage() - sys.exit(-1) - else: - src_files.append(arg) - argn += 1 + cfg.setitem("nodata", False, flags=["--nodata",], help="do not compare dataset data") + cfg.setitem("noattr", False, flags=["--noattr",], help="do not compare attributes") + cfg.setitem("quiet", False, flags=["--quiet",], help="surpress normal output") + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") - # setup logging - logging.basicConfig( - filename=logfname, - format="%(levelname)s %(asctime)s %(filename)s:%(lineno)d %(message)s", - level=loglevel, - ) - logging.debug("set log_level to {}".format(loglevel)) - - # end arg parsing - logging.info("username: {}".format(cfg["hs_username"])) - logging.info("endpoint: {}".format(cfg["hs_endpoint"])) - logging.info("verbose: {}".format(verbose)) - - if len(src_files) < 2: - # need at least a src and destination + try: + args = cfg.set_cmd_flags(sys.argv[1:]) + except ValueError as ve: + print(ve) usage() - sys.exit(-1) - file_path = src_files[0] - domain_path = src_files[1] - - logging.info("file: {}".format(file_path)) - logging.info("domain: {}".format(domain_path)) - if domain_path.startswith("/") or domain_path.startswith("hdf5://"): - logging.debug("domain path is absolute") - else: - msg = "domain must be an absolute path" - logging.error(msg) + + if cfg["quiet"] and cfg["verbose"]: + msg = "--quiet and --verbose options can't be used together" sys.exit(msg) + if len(args) < 2: + # need at least source and target + usage() + file_path = args[0] + domain_path = args[1] + + # setup logging + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() + logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) + logging.debug(f"set log_level to {loglevel}") + + + rc = 0 + s3 = None # s3fs instance + + cfg.print(f"file: {file_path}") + cfg.print(f"domain: {domain_path}") + if domain_path[-1] == "/": msg = "domain can't be a folder" logging.error(msg) sys.exit(msg) - if cfg["hs_endpoint"] is None: - logging.error("No endpoint given, try -h for help\n") - sys.exit(1) - logging.info("endpoint: {}".format(cfg["hs_endpoint"])) - try: # get a handle to input file @@ -613,50 +539,46 @@ def main(): try: fin = h5py.File(s3.open(file_path, "rb"), mode="r") except IOError as ioe: - logging.error("Error opening file {}: {}".format(file_path, ioe)) - sys.exit(1) + msg = f"Error opening file {file_path}: {ioe}" + logging.error(msg) + sys.exit(msg) else: # regular h5py open try: fin = h5py.File(file_path, mode="r") except IOError as ioe: - logging.error("Error opening file {}: {}".format(domain_path, ioe)) - sys.exit(1) + msg = f"Error opening file {domain_path}: {ioe}" + logging.error(msg) + sys.exit(msg) # get the domain try: - username = cfg["hs_username"] - password = cfg["hs_password"] - endpoint = cfg["hs_endpoint"] - bucket = cfg["hs_bucket"] - fout = h5pyd.File( - domain_path, - "r", - endpoint=endpoint, - username=username, - password=password, - bucket=bucket, - ) + fout = getFile(domain_path) except IOError as ioe: if ioe.errno == 404: - logging.error("domain: {} not found".format(domain_path)) - if ioe.errno == 403: - logging.error("No read access to domain: {}".format(domain_path)) + msg = f"domain: {domain_path} not found" + logging.error(msg) + elif ioe.errno == 403: + msg = f"No read access to domain: {domain_path}" + logging.error(msg) else: - logging.error("Error opening file {}: {}".format(domain_path, ioe)) - sys.exit(1) - - # do the actual load - if quiet: - verbose = False - rc = diff_file( - fin, fout, verbose=verbose, nodata=nodata, noattr=noattr, quiet=quiet - ) - - if not quiet and rc > 0: - print("{} differences found".format(rc)) - - logging.info("diff_file done") + msg = f"Error opening file: {domain_path}: {ioe}" + logging.error(msg) + sys.exit(msg) + + # do the actual diff + kwargs = {} + kwargs["verbose"] = cfg["verbose"] + kwargs["nodata"] = cfg["nodata"] + kwargs["noattr"] = cfg["noattr"] + kwargs["quiet"] = cfg["quiet"] + rc = diff_file(fin, fout, **kwargs) + + + if not cfg["quiet"] and rc > 0: + print(f"{rc} differences found") + + cfg.print(f"diff done for {file_path}") except KeyboardInterrupt: logging.error("Aborted by user via keyboard interrupt.") diff --git a/h5pyd/_apps/hsget.py b/h5pyd/_apps/hsget.py index d78dc250..2b4ce420 100755 --- a/h5pyd/_apps/hsget.py +++ b/h5pyd/_apps/hsget.py @@ -31,179 +31,101 @@ #---------------------------------------------------------------------------------- def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() print("Usage:\n") - print((" {} [ OPTIONS ] domain filepath".format(cfg["cmd"]))) + print(f" {cmd} [ OPTIONS ] domain [ filepath ]") print("") print("Description:") print(" Copy server domain to local HDF5 file") - print(" domain: HDF Server domain (Unix or DNS style)") + print(" domain: domain to be copied") print(" filepath: HDF5 file to be created ") print("") print("Options:") - print(" -v | --verbose :: verbose output") - print(" -e | --endpoint :: The HDF Server endpoint, e.g. http://hsdshdflab.hdfgroup.org") - print(" -u | --user :: User name credential") - print(" -p | --password :: Password credential") - print(" -c | --conf :: A credential and config file") - print(" --cnf-eg :: Print a config file and then exit") - print(" --logfile :: logfile path") - print(" --loglevel debug|info|warning|error :: Change log level") - print(" --bucket :: Storage bucket") - print(" --nodata :: Do not download dataset data") - print(" --ignore :: Don't exit on error") - print(" -h | --help :: This message.") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") print("") + print("Examples:") + print(f" {cmd} /shared/tall.h5 tall.h5") + print(f" {cmd} hdf5://shared/tall.h5 tall.h5") + print(f" {cmd} hdf5://shared/tall.h5 # creates local file 'tall.h5'") + print("") + print(cfg.get_see_also(cmd)) + print("") + sys.exit() #end print_usage -#---------------------------------------------------------------------------------- -def print_config_example(): - print("# default") - print("hs_username = ") - print("hs_password = ") - print("hs_endpoint = http://hsdshdflab.hdfgroup.org") -#print_config_example #---------------------------------------------------------------------------------- def main(): + + cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite target") + cfg.setitem("nodata", False, flags=["--nodata",], help="do not copy dataset data") + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") - loglevel = logging.ERROR - verbose = False - ignore_error = False - dataload = "ingest" # or None - - cfg["cmd"] = sys.argv[0].split('/')[-1] - if cfg["cmd"].endswith(".py"): - cfg["cmd"] = "python " + cfg["cmd"] - cfg["verbose"] = False - - endpoint=cfg["hs_endpoint"] - username=cfg["hs_username"] - password=cfg["hs_password"] - bucket = cfg["hs_bucket"] - logfname=None - - des_file = None - src_domain = None - argn = 1 - while argn < len(sys.argv): - arg = sys.argv[argn] - val = None - - if arg[0] == '-' and src_domain is not None: - # options must be placed before filenames - print("options must precead source files") - usage() - sys.exit(-1) - if len(sys.argv) > argn + 1: - val = sys.argv[argn+1] - if arg in ("-v", "--verbose"): - verbose = True - argn += 1 - elif arg == "--nodata": - dataload = None - argn += 1 - elif arg == "--loglevel": - if val == "debug": - loglevel = logging.DEBUG - elif val == "info": - loglevel = logging.INFO - elif val == "warning": - loglevel = logging.WARNING - elif val == "error": - loglevel = logging.ERROR - else: - print("unknown loglevel") - usage() - sys.exit(-1) - argn += 2 - elif arg == '--logfile': - logfname = val - argn += 2 - elif arg in ("-b", "--bucket"): - bucket = val - argn += 2 - elif arg in ("-h", "--help"): - usage() - sys.exit(0) - elif arg in ("-e", "--endpoint"): - endpoint = val - argn += 2 - elif arg in ("-u", "--username"): - username = val - argn += 2 - elif arg in ("-p", "--password"): - password = val - argn += 2 - elif arg == '--cnf-eg': - print_config_example() - sys.exit(0) - elif arg == "--ignore": - ignore_error = True - elif arg[0] == '-': - usage() - sys.exit(-1) - elif src_domain is None: - src_domain = arg - argn += 1 - elif des_file is None: - des_file = arg - argn += 1 - else: - usage() - sys.exit(-1) - - # setup logging - logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) - logging.debug("set log_level to {}".format(loglevel)) - - # end arg parsing - logging.info("username: {}".format(username)) - logging.info("password: {}".format(password)) - logging.info("endpoint: {}".format(endpoint)) - logging.info("verbose: {}".format(verbose)) + try: + cmdline_values = cfg.set_cmd_flags(sys.argv[1:]) + except ValueError as ve: + print(ve) + usage() - if src_domain is None or des_file is None: - # need at least a src and destination + if len(cmdline_values) < 1: usage() - sys.exit(-1) - logging.info("source domain: {}".format(src_domain)) - logging.info("target file: {}".format(des_file)) + src_domain = cmdline_values[0] + if len(cmdline_values) > 1: + des_file = cmdline_values[1] + else: + # use domain base name as file + parts = src_domain.split('/') + des_file = parts[-1] + # setup logging + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() + logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) + logging.debug(f"set log_level to {loglevel}") - if endpoint is None: - logging.error('No endpoint given, try -h for help\n') - sys.exit(1) - logging.info("endpoint: {}".format(endpoint)) + logging.info(f"source domain: {src_domain}") + logging.info(f"target file: {des_file}") # get a handle to input domain + kwargs = {} + kwargs["endpoint"] = cfg["endpoint"] + kwargs["username"] = cfg["username"] + kwargs["password"] = cfg["password"] + kwargs["bucket"] = cfg["bucket"] try: - fin = h5pyd.File(src_domain, mode='r', endpoint=endpoint, username=username, password=password, bucket=bucket, use_cache=True) + fin = h5pyd.File(src_domain, mode='r', **kwargs) except IOError as ioe: if ioe.errno == 403: - logging.error("No read access to domain: {}".format(src_domain)) + logging.error(f"No read access to domain: {src_domain}") elif ioe.errno == 404: - logging.error("Domain: {} not found".format(src_domain)) + logging.error(f"Domain: {src_domain} not found") elif ioe.errno == 410: - logging.error("Domain: {} has been recently deleted".format(src_domain)) + logging.error(f"Domain: {src_domain} has been recently deleted") else: - logging.error("Error opening domain {}: {}".format(src_domain, ioe)) + logging.error(f"Error opening domain {src_domain}: {ioe}") sys.exit(1) # create the output HDF5 file + mode = "x" if cfg["no_clobber"] else "w" try: - fout = h5py.File(des_file, 'w') + fout = h5py.File(des_file, mode) except IOError as ioe: - logging.error("Error creating file {}: {}".format(des_file, ioe)) + logging.error(f"Error creating file {des_file}: {ioe}") sys.exit(1) try: - load_file(fin, fout, verbose=verbose, ignore_error=ignore_error, dataload=dataload) - msg = "Domain {} downloaded to file: {}".format(src_domain, des_file) - logging.info(msg) - if verbose: - print(msg) + kwargs = {} + kwargs["verbose"] = cfg["verbose"] + kwargs["ignore_error"] = cfg["ignore_error"] + kwargs["dataload"] = None if cfg["nodata"] else "ingest" + load_file(fin, fout, **kwargs) + cfg.print(f"Domain {src_domain} downloaded to file: {des_file}") except KeyboardInterrupt: logging.error('Aborted by user via keyboard interrupt.') sys.exit(1) diff --git a/h5pyd/_apps/hsinfo.py b/h5pyd/_apps/hsinfo.py index fdb84aaf..7a65c83b 100644 --- a/h5pyd/_apps/hsinfo.py +++ b/h5pyd/_apps/hsinfo.py @@ -1,7 +1,18 @@ +############################################################################## +# Copyright by The HDF Group. # +# All rights reserved. # +# # +# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and # +# Utilities. The full HSDS copyright notice, including # +# terms governing use, modification, and redistribution, is contained in # +# the file COPYING, which can be found at the root of the source code # +# distribution tree. If you do not have access to this file, you may # +# request a copy from help@hdfgroup.org. # +############################################################################## + import sys import logging import time -from datetime import datetime import h5pyd if __name__ == "__main__": @@ -10,49 +21,37 @@ from .config import Config # -# Print objects in a domain in the style of the hsls utilitiy +# Get server status info # - cfg = Config() - # # Usage # -def printUsage(): - print( - "Usage: {} [-h] [--loglevel debug|info|warning|error] [--logfile ] [-c oonf_file] [-e endpoint] [-u username] [-p password] [-b bucket] [domain]".format( - cfg["cmd"] - ) - ) +def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() + print("Usage:\n") + print(f" {cmd} [ OPTIONS ]") print("") print("Description:") - print( - " Get status information from server, or domain stats if domain is provided" - ) + print(" Get status information from server") print("") + print("Options:") - print( - " -e | --endpoint :: The HDF Server endpoint, e.g. http://hsdshdflab.hdfgroup.org" - ) - print(" -u | --user :: User name credential") - print(" -p | --password :: Password credential") - print( - " -b | --bucket :: bucket name (for use when domain is provided)" - ) - print(" -c | --conf :: A credential and config file") - print( - " -H | --human-readable :: with -v, print human readable sizes (e.g. 123M)" - ) - print(" --rescan :: refresh domain stats (for use when domain is provided)") - print(" --logfile :: logfile path") - print(" --loglevel debug|info|warning|error :: Change log level") - print(" --bucket :: Storage bucket") - print(" -h | --help :: This message.") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") + print("") + print("examples:") + print(f" {cmd} -e http://hsdshdflab.hdfgroup.org") + print(f" {cmd} -e http://hsdshdflab.hdfgroup.org /shared/tall.h5") + print(cfg.get_see_also(cmd)) + print("") sys.exit() - # # # @@ -77,24 +76,6 @@ def getUpTime(start_time): return ret_str -def format_size(n): - if n is None or n == " ": - return " " * 8 - symbol = " " - if not cfg["human_readable"]: - return str(n) - # convert to common storage unit - for s in ("B", "K", "M", "G", "T"): - if n < 1024: - symbol = s - break - n /= 1024 - if symbol == "B": - return "{:}B".format(n) - else: - return "{:.1f}{}".format(n, symbol) - - def getServerInfo(cfg): """get server state and print""" username = cfg["hs_username"] @@ -107,7 +88,7 @@ def getServerInfo(cfg): print("server name: {}".format(info["name"])) if "state" in info: print("server state: {}".format(info["state"])) - print("endpoint: {}".format(endpoint)) + print(f"endpoint: {endpoint}") if "isadmin" in info and info["isadmin"]: admin_tag = "(admin)" else: @@ -119,11 +100,9 @@ def getServerInfo(cfg): try: home_folder = getHomeFolder() if home_folder: - print("home: {}".format(home_folder)) + print(f"home: {home_folder}") except IOError: - print( - "home: NO ACCESS", - ) + print("home: NO ACCESS") if "hsds_version" in info: print("server version: {}".format(info["hsds_version"])) @@ -133,109 +112,18 @@ def getServerInfo(cfg): print("server version: {}".format(info["h5serv_version"])) if "start_time" in info: uptime = getUpTime(info["start_time"]) - print("up: {}".format(uptime)) + print(f"up: {uptime}") print("h5pyd version: {}".format(h5pyd.version.version)) except IOError as ioe: if ioe.errno == 401: if username and password: - print("username/password not valid for username: {}".format(username)) + print(f"username/password not valid for username: {username}") else: # authentication error with openid or app token print("authentication failure") else: - print("Error: {}".format(ioe)) - - -def getDomainInfo(domain, cfg): - """get info about the domain and print""" - username = cfg["hs_username"] - password = cfg["hs_password"] - endpoint = cfg["hs_endpoint"] - bucket = cfg["hs_bucket"] - if "rescan" in cfg and cfg["rescan"]: - mode = "r+" # need write intent - else: - mode = "r" - - if domain.endswith("/"): - is_folder = True - else: - is_folder = False - - try: - if is_folder: - f = h5pyd.Folder( - domain, - mode=mode, - endpoint=endpoint, - username=username, - password=password, - bucket=bucket, - use_cache=True, - ) - else: - f = h5pyd.File( - domain, - mode=mode, - endpoint=endpoint, - username=username, - password=password, - bucket=bucket, - use_cache=False, - ) - except IOError as oe: - if oe.errno in (404, 410): # Not Found - sys.exit("domain: {} not found".format(domain)) - elif oe.errno == 401: # Unauthorized - sys.exit("Authorization failure") - elif oe.errno == 403: # Forbidden - sys.exit("Not allowed") - else: - sys.exit("Unexpected error: {}".format(oe)) - - timestamp = datetime.fromtimestamp(int(f.modified)) - if not is_folder and f.last_scan: - last_scan = datetime.fromtimestamp(int(f.last_scan)) - else: - last_scan = None - - if is_folder: - print("folder: {}".format(domain)) - print(" owner: {}".format(f.owner)) - print(" last modified: {}".format(timestamp)) - else: - if "rescan" in cfg and cfg["rescan"]: - f.run_scan() - - # report HDF objects (groups, datasets, and named datatypes) vs. allocated chunks - num_objects = f.num_groups + f.num_datatypes + f.num_datasets - if f.num_chunks > 0: - num_chunks = f.num_chunks - else: - # older storeinfo format doesn't have num_chunks, so calculate - num_chunks = f.num_objects - num_objects - - print("domain: {}".format(domain)) - print(" owner: {}".format(f.owner)) - print(" id: {}".format(f.id.id)) - print(" last modified: {}".format(timestamp)) - if last_scan: - print(" last scan: {}".format(last_scan)) - if f.md5_sum: - print(" md5 sum: {}".format(f.md5_sum)) - print(" total_size: {}".format(format_size(f.total_size))) - print(" allocated_bytes: {}".format(format_size(f.allocated_bytes))) - if f.metadata_bytes: - print(" metadata_bytes: {}".format(format_size(f.metadata_bytes))) - if f.linked_bytes: - print(" linked_bytes: {}".format(format_size(f.linked_bytes))) - print(" num objects: {}".format(num_objects)) - print(" num chunks: {}".format(num_chunks)) - if f.num_linked_chunks: - print(" linked chunks: {}".format(f.num_linked_chunks)) - - f.close() + print(f"Error: {ioe}") # @@ -281,79 +169,31 @@ def getHomeFolder(): # Main # def main(): - argn = 1 - cfg["cmd"] = sys.argv[0].split("/")[-1] - if cfg["cmd"].endswith(".py"): - cfg["cmd"] = "python " + cfg["cmd"] - cfg["loglevel"] = logging.ERROR - cfg["logfname"] = None - cfg["human_readable"] = False - domains = [] - while argn < len(sys.argv): - arg = sys.argv[argn] - val = None - if len(sys.argv) > argn + 1: - val = sys.argv[argn + 1] - if arg == "--loglevel": - val = val.upper() - if val == "DEBUG": - cfg["loglevel"] = logging.DEBUG - elif val == "INFO": - cfg["loglevel"] = logging.INFO - elif val in ("WARN", "WARNING"): - cfg["loglevel"] = logging.WARNING - elif val == "ERROR": - cfg["loglevel"] = logging.ERROR - else: - printUsage() - argn += 2 - elif arg == "--logfile": - cfg["logfname"] = val - argn += 2 - elif arg in ("-h", "--help"): - printUsage() - elif arg in ("-e", "--endpoint"): - cfg["hs_endpoint"] = val - argn += 2 - elif arg in ("-u", "--username"): - cfg["hs_username"] = val - argn += 2 - elif arg in ("-p", "--password"): - cfg["hs_password"] = val - argn += 2 - elif arg in ("-b", "--bucket"): - cfg["hs_bucket"] = val - argn += 2 - elif arg == "--rescan": - cfg["rescan"] = True - argn += 1 - elif arg == "-H": - cfg["human_readable"] = True - argn += 1 - else: - domains.append(arg) - argn += 1 + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") - # setup logging + try: + domains = cfg.set_cmd_flags(sys.argv[1:]) + except ValueError as ve: + print(ve) + usage() - logging.basicConfig( - filename=cfg["logfname"], - format="%(levelname)s %(asctime)s %(message)s", - level=cfg["loglevel"], - ) - logging.debug("set log_level to {}".format(cfg["loglevel"])) + # setup logging + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() + logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) + logging.debug(f"set log_level to {loglevel}") - endpoint = cfg["hs_endpoint"] - if not endpoint or endpoint[-1] == "/" or endpoint[:4] not in ("http", "loca"): - print("WARNING: endpoint: {} doesn't appear to be valid".format(endpoint)) + if domains: + sys.exit("Use the hsstat command to get information about about a folder or domain ") - if not domains: - getServerInfo(cfg) - else: - for domain in domains: - getDomainInfo(domain, cfg) + if not cfg["hs_endpoint"]: + logging.error("endpoint not set") + usage() + + getServerInfo(cfg) + if __name__ == "__main__": main() diff --git a/h5pyd/_apps/hsload.py b/h5pyd/_apps/hsload.py index 21298b9e..be69a630 100755 --- a/h5pyd/_apps/hsload.py +++ b/h5pyd/_apps/hsload.py @@ -36,302 +36,132 @@ from .config import Config from .utillib import load_file -if sys.version_info >= (3, 0): - from urllib.parse import urlparse -else: - from urlparse import urlparse +from urllib.parse import urlparse cfg = Config() +# ---------------------------------------------------------------------------------- +def abort(msg): + logging.error(msg) + if cfg["logfile"]: + sys.stderr.write(msg + "\n") + logging.error("exiting program with return code -1") + sys.exit(-1) # ---------------------------------------------------------------------------------- def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() print("Usage:\n") - print((" {} [ OPTIONS ] sourcefile domain".format(cfg["cmd"]))) - print((" {} [ OPTIONS ] sourcefile folder".format(cfg["cmd"]))) + print(f" {cmd} [ OPTIONS ] sourcefile domain") + print(f" {cmd} [ OPTIONS ] sourcefile_1, sourcefile_2,... folder") print("") print("Description:") - print(" Copy HDF5 file to Domain or multiple files to a Domain folder") + print(" Copy HDF5 file to domain or multiple files to a domain folder") print(" sourcefile: HDF5 file to be copied ") - print(" domain: HDF Server domain (Unix or DNS style)") - print(" folder: HDF Server folder (Unix style ending in '/')") + print(" domain: HSDS domain (absolute path with or without hdf5:// prefix)") + print(" folder: HSDS folder (path as above ending in '/')") print("") print("Options:") - print(" -v | --verbose :: verbose output") - print( - " -e | --endpoint :: The HDF Server endpoint, e.g. http://hsdshdflab.hdfgroup.org" - ) - print(" -u | --user :: User name credential") - print(" -p | --password :: Password credential") - print( - " -a | --append :: Flag to append to an existing HDF Server domain" - ) - print( "-n | --no-clobber :: Do not overwrite existing domains (or existing datasets/groups in -a mode) ") - print(" --extend :: extend along the given dimension scale") - print(" --extend-offset :: write data at index n along extended dimension") - print(" -c | --conf :: A credential and config file") - print( - " -z[n] :: apply compression filter to any non-compressed datasets, n: [0-9]" - ) - print( - " --compression blosclz|lz4|lz4hc|snappy|gzip|zstd :: use the given compression algorithm for -z option (lz4 is default)" - ) - print(" --cnf-eg :: Print a config file and then exit") - print(" --logfile :: logfile path") - print(" --loglevel debug|info|warning|error :: Change log level") - print(" --bucket :: Storage bucket") - print(" --nodata :: Do not upload dataset data") - print(" --link :: Link to dataset data (sourcefile given as /) or s3uri") - print(" --linkpath :: Use the given path for the link references rather than the src path") - print(" --retries :: Set number of server retry attempts") - print(" --ignore :: Don't exit on error") - print(" -h | --help :: This message.") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") print("") print("Note about --link option:") - print( - " --link enables just the source HDF5 metadata to be ingested while the dataset data" - ) + print(" --link enables just the source HDF5 metadata to be ingested while the dataset data") print(" is left in the original file and fetched as needed.") - print( - " When used with files stored in AWS S3, the source file can be specified using the S3" - ) - print( - " path: 's3:///'. Preferably, the bucket should be in the same" - ) + print(" When used with files stored in AWS S3, the source file can be specified using the S3") + print(" path: 's3:///'. Preferably, the bucket should be in the same") print(" region as the HSDS service") - print( - " For Posix or Azure deployments, the source file needs to be copied to a regular file" - ) - print( - " system and hsload run from a directory that mirrors the bucket layout. E.g. if" - ) - print( - " consider a Posix deployment where the ROOT_DIR is '/mnt/data' and the HSDS default" - ) - print( - " bucket is 'hsdsdata' (so ingested data will be stored in '/mnt/data/hsdsdata'), the" - ) - print( - " source HDF5 files could be stored in '/mnt/data/hdf5/' and the file 'myhdf5.h5'" - ) - print(" would be imported as: 'hsload --link data/hdf5/myhdf5.h5 '") + print(" For Posix or Azure deployments, the source file needs to be copied to a regular file,") + print(" and the --linkpath option should be used to specifiy the Azure container name and path, or ") + print(" (for HSDS deployed with POSIX) the file path relative to the server ROOT_DIR") print("") - print( - " Also, the --link option requires hdf5lib 1.10.6 or higher and h5py 2.10 or higher." - ) - print( - " The docker image: 'hdfgroup/hdf5lib:1.10.6' includes these versions as well as h5pyd." - ) - print( - " E.g.: 'docker run --rm -v ~/.hscfg:/root/.hscfg -v ~/data:/data -it hdfgroup/hdf5lib:1.10.6 bash'" - ) + print(cfg.get_see_also(cmd)) + print("") + sys.exit(-1) + # end print_usage # ---------------------------------------------------------------------------------- -def print_config_example(): - print("# default") - print("hs_username = ") - print("hs_password = ") - print("hs_endpoint = http://hsdshdflab.hdfgroup.org") +def main(): + COMPRESSION_FILTERS = ("blosclz", "lz4", "lz4hc", "snappy", "gzip", "zstd") -# print_config_example + s3 = None # S3FS instance + cfg.setitem("append", False, flags=["-a", "--append"], help="append to existing domain") + cfg.setitem("extend", None, flags=["--extend",], choices=["DIMSCALE",], help="extend along given dimensionscale") + cfg.setitem("extend_offset", None, flags=["--extend-offset"], choices=["N",], help="write data at index n along extended dimension") + cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite target") + cfg.setitem("nodata", False, flags=["--nodata",], help="do not copy dataset data") + cfg.setitem("z", None, flags=["-z",], choices=["N",], help="apply compression filter to any non-compressed datasets, n: [0-9]") + cfg.setitem("link", None, flags=["--link",], help="Link to dataset data (sourcefile given as /) or s3uri") + cfg.setitem("linkpath", None, flags=["--linkpath",], choices=["PATH_URI",], help="Use the given URI for the link references rather than the src path") + cfg.setitem("compression", None, flags=["--compression",], choices=COMPRESSION_FILTERS, help="use the given compression algorithm for -z option (lz4 is default)") + cfg.setitem("retries", 3, flags=["--retries",], choices=["N",], help="Set number of server retry attempts") + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") -# ---------------------------------------------------------------------------------- -def main(): + try: + cmdline_values = cfg.set_cmd_flags(sys.argv[1:]) + except ValueError as ve: + print(ve) + usage() - COMPRESSION_FILTERS = ("blosclz", "lz4", "lz4hc", "snappy", "gzip", "zstd") - loglevel = logging.ERROR - verbose = False - compression = None - compression_opts = None - append = False - no_clobber = False - extend_dim = None - extend_offset = None - s3path = None - dataload = "ingest" # or None, or "link" - cfg["cmd"] = sys.argv[0].split("/")[-1] - if cfg["cmd"].endswith(".py"): - cfg["cmd"] = "python " + cfg["cmd"] - cfg["logfname"] = None - logfname = None - s3 = None # s3fs instance - retries = 10 # number of retry attempts for HSDS requests - link_path = None - ignore_error = False - src_files = [] - argn = 1 - while argn < len(sys.argv): - arg = sys.argv[argn] - val = None - - if arg[0] == "-" and len(src_files) > 0: - # options must be placed before filenames - sys.stderr.write("options must precede source files\n") - usage() - sys.exit(-1) - - if len(sys.argv) > argn + 1: - val = sys.argv[argn + 1] - - if arg in ("-v", "--verbose"): - verbose = True - argn += 1 - elif arg == "--link": - if dataload != "ingest": - sys.stderr.write("--nodata flag can't be used with link flag\n") - sys.exit(1) - dataload = "link" - argn += 1 - elif arg == "--linkpath": - link_path = val - argn += 2 - elif arg == "--nodata": - if dataload != "ingest": - sys.stderr.write("--nodata flag can't be used with link flag\n") - sys.exit(1) - dataload = None - argn += 1 - elif arg == "--loglevel": - if val == "debug": - loglevel = logging.DEBUG - elif val == "info": - loglevel = logging.INFO - elif val == "warning": - loglevel = logging.WARNING - elif val == "error": - loglevel = logging.ERROR - else: - sys.stderr.write("unknown loglevel\n") - sys.exit(-1) - argn += 2 - elif arg == "--logfile": - logfname = val - argn += 2 - elif arg in ("-b", "--bucket"): - cfg["hs_bucket"] = val - argn += 2 - elif arg in ("-h", "--help"): - usage() - sys.exit(0) - elif arg in ("-e", "--endpoint"): - cfg["hs_endpoint"] = val - argn += 2 - elif arg in ("-u", "--username"): - cfg["hs_username"] = val - argn += 2 - elif arg in ("-p", "--password"): - cfg["hs_password"] = val - argn += 2 - elif arg in ("-a", "--append"): - append = True - argn += 1 - elif arg in ("-n", "--no-clobber"): - no_clobber = True - argn += 1 - elif arg == "--extend": - extend_dim = val - argn += 2 - elif arg == "--extend-offset": - extend_offset = int(val) - argn += 2 - elif arg == "--cnf-eg": - print_config_example() - sys.exit(0) - elif arg.startswith("-z"): - compression_opts = 4 - if len(arg) > 2: - try: - compression_opts = int(arg[2:]) - except ValueError: - sys.stderr.write("Compression Level must be int between 0 and 9\n") - sys.exit(-1) - if not compression: - compression = "lz4" - argn += 1 - elif arg in ("-c", "--compression"): - if val not in COMPRESSION_FILTERS: - sys.stderr.write("unknown compression filter\n") - usage() - sys.exit(-1) - compression = val - argn += 2 - elif arg == "--retries": - retries = int(val) - argn += 2 - elif arg == "--ignore": - ignore_error = True - argn += 1 - elif arg[0] == "-": - usage() - sys.exit(-1) - else: - src_files.append(arg) - argn += 1 - - if link_path and dataload != "link": - sys.stderr.write("--linkpath option can only be used with --link\n") - sys.exit(-1) - - if extend_offset and extend_dim is None: - sys.stderr.write("--extend-offset option can only be used with --link\n") - sys.exit(-1) - - if extend_dim is not None and dataload == "link": - sys.stderr.write("--extend option can't be used with --link\n") - sys.exit(-1) + if len(cmdline_values) < 2: + usage() + + domain = cmdline_values[-1] + src_files = cmdline_values[:-1] # setup logging - logging.basicConfig( - filename=logfname, - format="%(levelname)s %(asctime)s %(filename)s:%(lineno)d %(message)s", - level=loglevel, - ) - logging.debug("set log_level to {}".format(loglevel)) - - # end arg parsing - logging.info("username: {}".format(cfg["hs_username"])) - logging.info("endpoint: {}".format(cfg["hs_endpoint"])) - logging.info(f"verbose: {verbose}") - - if len(src_files) < 2: - # need at least a src and destination - usage() - sys.exit(-1) - domain = src_files[-1] - src_files = src_files[:-1] + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() + logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) + logging.debug(f"set log_level to {loglevel}") + + if cfg["linkpath"] and not cfg["link"]: + abort("--linkpath option can only be used with --link") + + if cfg["extend_offset"] and cfg["extend_dim"] is None: + abort("--extend-offset option can only be used with --link") + + if cfg["extend_dim"] and cfg["link"]: + abort("--extend option can't be used with --link") + + if cfg["nodata"] and cfg["link"]: + abort("--nodata option can't be used with --link") + + if cfg["link"]: + dataload = "link" + elif cfg["nodata"]: + dataload = None + else: + dataload = "ingest" + logging.info(f"source files: {src_files}") logging.info(f"target domain: {domain}") - if len(src_files) > 1 and (domain[0] != "/" or domain[-1] != "/"): - msg = "target must be a folder if multiple source files are provided\n" - sys.stderr.write(msg) - usage() - sys.exit(-1) - - if cfg["hs_endpoint"] is None: - sys.stderr.write("No endpoint given, try -h for help\n") - sys.exit(1) - logging.info("endpoint: {}".format(cfg["hs_endpoint"])) + if len(src_files) > 1 and domain[-1] != "/": + abort("target must be a folder if multiple source files are provided") - # check we have min HDF5 lib version for chunk query - if dataload == "link": + # check we have min HDF5 lib version for link option + if cfg["link"]: logging.info("checking libversion") if ( h5py.version.version_tuple.major == 2 and h5py.version.version_tuple.minor < 10 ): - sys.stderr.write("link option requires h5py version 2.10 or higher\n") - sys.exit(1) + abort("link option requires h5py version 2.10 or higher") + if h5py.version.hdf5_version_tuple < (1, 10, 6): - sys.stderr.write("link option requires hdf5 lib version 1.10.6 or higher\n") - sys.exit(1) + abort("link option requires h5py version 2.10 or higher") + try: @@ -349,7 +179,7 @@ def main(): if src_file.startswith("s3://"): s3path = src_file if not S3FS_IMPORT: - sys.stderr.write("Install S3FS package to load s3 files\n") + abort("Install S3FS package to load s3 files") sys.exit(1) if not s3: @@ -364,29 +194,27 @@ def main(): try: fin = h5py.File(s3.open(src_file, "rb"), moe="r") except IOError as ioe: - logging.error(f"Error opening file {src_file}: {ioe}") - sys.exit(1) + abort(f"Error opening file {src_file}: {ioe}") + else: - if dataload == "link": - if op.isabs(src_file) and not link_path: - sys.stderr.write( - "source file must be s3path (for HSDS using S3 storage) or relative path from server root directory (for HSDS using posix storage)\n" - ) - sys.exit(1) + if cfg["link"]: + if op.isabs(src_file) and not cfg["linkpath"]: + msg = "source file must be s3path (for HSDS using S3 storage) or relative path from server " + msg += "root directory (for HSDS using posix storage)" + abort(msg) s3path = src_file else: s3path = None try: fin = h5py.File(src_file, mode="r") except IOError as ioe: - logging.error(f"Error opening file {src_file}: {ioe}") - sys.exit(1) + abort(f"Error opening file {src_file}: {ioe}") # create the output domain try: - if append: + if cfg["append"]: mode = "a" - elif no_clobber: + elif cfg["no_clobber"]: mode = "x" else: mode = "w" @@ -396,53 +224,55 @@ def main(): "endpoint": cfg["hs_endpoint"], "bucket": cfg["hs_bucket"], "mode": mode, - "retries": retries, + "retries": cfg["retries"], } fout = h5pyd.File(tgt, **kwargs) except IOError as ioe: if ioe.errno == 404: - logging.error(f"Domain: {tgt} not found") + abort(f"Domain: {tgt} not found") elif ioe.errno == 403: - logging.error(f"No write access to domain: {tgt}") + abort(f"No write access to domain: {tgt}") else: - logging.error(f"Error creating file {tgt}: {ioe}") - sys.exit(1) + abort(f"Error creating file {tgt}: {ioe}") - if link_path: + if cfg["linkpath"]: # now that we have a handle to the source file, # repurpose s3path to the s3uri that will actually get stored # in the target domain - s3path = link_path + s3path = cfg["linkpath"] - if not append and no_clobber: - # no need to check for clobber if not in append mode + if cfg["no_clobber"]: + if cfg["append"]: + # no need to check for clobber if not in append mode + no_clobber = False + else: + no_clobber = True + else: no_clobber = False # do the actual load kwargs = { - "verbose": verbose, + "verbose": cfg["verbose"], "dataload": dataload, "s3path": s3path, - "compression": compression, - "compression_opts": compression_opts, - "append": append, - "extend_dim": extend_dim, - "extend_offset": extend_offset, - "verbose": verbose, - "ignore_error": ignore_error, + "compression": cfg["compression"], + "compression_opts": cfg["z"], + "append": cfg["append"], + "extend_dim": cfg["extend_dim"], + "extend_offset": cfg["extend_offset"], + "ignore_error": cfg["ignore_error"], "no_clobber": no_clobber - } + } load_file(fin, fout, **kwargs) msg = f"File {src_file} uploaded to domain: {tgt}" logging.info(msg) - if verbose: + if cfg["verbose"]: print(msg) except KeyboardInterrupt: - logging.error("Aborted by user via keyboard interrupt.") - sys.exit(1) + abort("Aborted by user via keyboard interrupt.") # __main__ diff --git a/h5pyd/_apps/hsls.py b/h5pyd/_apps/hsls.py index 1143bb38..579881e2 100644 --- a/h5pyd/_apps/hsls.py +++ b/h5pyd/_apps/hsls.py @@ -16,7 +16,6 @@ cfg = Config() - def intToStr(n): if cfg["human_readable"]: s = "{:,}".format(n) @@ -362,26 +361,29 @@ def visitDomains(domain, depth=1): # # Usage # -def printUsage(): - print("usage: {} [-v] [-h] [--showacls] [--showattrs] [--recursive|-r] [--loglevel debug|info|warning|error] [--logfile ] [-e endpoint] [-u username] [-p password] [--bucket bucketname] domains".format(cfg["cmd"])) - print("example: {} -r -e http://hsdshdflab.hdfgroup.org /shared/tall.h5".format(cfg["cmd"])) +def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() + print("Usage:\n") + print(f" {cmd} [ OPTIONS ] domain") + print(f" {cmd} [ OPTIONS ] folder") + print("") + print("Description:") + print(" List contents of a domain or folder") + print(" domain: HSDS domain (absolute path with or without 'hdf5:// prefix)") + print(" folder: HSDS folder (path as above ending in '/')") print("") + print("Options:") - print(" -v | --verbose :: verbose output") - print(" -H | --human-readable :: with -v, print human readable sizes (e.g. 123M)") - print(" -e | --endpoint :: The HDF Server endpoint, e.g. http://hsdshdflab.hdfgroup.org") - print(" -u | --user :: User name credential") - print(" -p | --password :: Password credential") - print(" -c | --conf :: A credential and config file") - print(" --showacls :: prints domain ACLs") - print(" --showattrs :: print attributes") - print(" --pattern :: :: list domains that match the given regex") - print(" --query :: list domains where the attributes of the root group match the given query string") - print(" --logfile :: logfile path") - print(" --loglevel debug|info|warning|error :: Change log level") - print(" --bucket :: Storage bucket") - print(" --recursive, -r :: recursively list sub-folders or sub-groups") - print(" -h | --help :: This message.") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") + print("") + print(f"example: {cmd} -r -e http://hsdshdflab.hdfgroup.org /shared/tall.h5") + print("") + print(cfg.get_see_also(cmd)) + print("") sys.exit() @@ -390,97 +392,42 @@ def printUsage(): # def main(): domains = [] - argn = 1 - depth = 1 - loglevel = logging.ERROR - logfname = None - cfg["verbose"] = False - cfg["showacls"] = False - cfg["showattrs"] = False - cfg["human_readable"] = False - cfg["pattern"] = None - cfg["query"] = None - cfg["cmd"] = sys.argv[0].split('/')[-1] - if cfg["cmd"].endswith(".py"): - cfg["cmd"] = "python " + cfg["cmd"] - - while argn < len(sys.argv): - arg = sys.argv[argn] - val = None - if len(sys.argv) > argn + 1: - val = sys.argv[argn + 1] - if arg in ("-r", "--recursive"): - depth = -1 - argn += 1 - elif arg in ("-v", "--verbose"): - cfg["verbose"] = True - argn += 1 - elif arg in ("-H", "--human-readable"): - cfg["human_readable"] = True - argn += 1 - elif arg == "--loglevel": - val = val.upper() - if val == "DEBUG": - loglevel = logging.DEBUG - elif val == "INFO": - loglevel = logging.INFO - elif val in ("WARN", "WARNING"): - loglevel = logging.WARNING - elif val == "ERROR": - loglevel = logging.ERROR - else: - printUsage() - argn += 2 - elif arg == '--logfile': - logfname = val - argn += 2 - elif arg in ("-showacls", "--showacls"): - cfg["showacls"] = True - argn += 1 - elif arg in ("-showattrs", "--showattrs"): - cfg["showattrs"] = True - argn += 1 - elif arg in ("-h", "--help"): - printUsage() - elif arg in ("-e", "--endpoint"): - cfg["hs_endpoint"] = val - argn += 2 - elif arg in ("-u", "--username"): - cfg["hs_username"] = val - argn += 2 - elif arg in ("-p", "--password"): - cfg["hs_password"] = val - argn += 2 - elif arg in ("-b", "--bucket"): - cfg["hs_bucket"] = val - argn += 2 - elif arg == "--pattern": - cfg["pattern"] = val - argn += 2 - elif arg == "--query": - cfg["query"] = val - argn += 2 - - elif arg[0] == '-': - printUsage() - else: - domains.append(arg) - argn += 1 - # setup logging - logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', - level=loglevel) - logging.debug("set log_level to {}".format(loglevel)) + # additional options + cfg.setitem("showacls", False, flags=["--showacls",], help="display domain ACLs") + cfg.setitem("showattrs", False, flags=["--showattrs",], help="display domain attributes") + cfg.setitem("pattern", None, flags=["--pattern",], choices=["REGEX",], help="list domains that match the given regex") + cfg.setitem("query", None, flags=["--query",], choices=["QUERY",], help="list domains where the attributes of the root group match the given query string") + cfg.setitem("recursive", False, flags=["-r", "--recursive"], help="recursively list sub-folders or sub-groups") + cfg.setitem("human_readable", False, flags=["-H", "--human-readable"], help="with -v, print human readable sizes (e.g. 123M)") + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") + + try: + domains = cfg.set_cmd_flags(sys.argv[1:]) + except ValueError as ve: + print(ve) + usage() if len(domains) == 0: - # add top-level domain + # need a domain - use root domains.append("/") + # setup logging + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() + logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) + logging.debug(f"set log_level to {loglevel}") + for domain in domains: + if cfg["recursive"]: + depth = -1 + else: + depth = 1 + if domain.endswith('/'): # given a folder path count = visitDomains(domain, depth=depth) - print("{} items".format(count)) + print(f"{count} items") else: try: @@ -490,24 +437,24 @@ def main(): print("Username/Password missing or invalid") continue if ioe.errno == 403: - print("No permission to read domain: {}".format(domain)) + print(f"No permission to read domain: {domain}") continue elif ioe.errno == 404: - print("Domain {} not found".format(domain)) + print(f"Domain {domain} not found") continue elif ioe.errno == 410: - print("Domain {} has been removed".format(domain)) + print(f"Domain {domain} has been removed") continue else: - print("Unexpected error: {}".format(ioe)) + print(f"Unexpected error: {ioe}") continue grp = f['/'] if grp is None: - print("{}: No such domain".format(domain)) + print(f"{domain}: No such domain") domain += '/' count = visitDomains(domain, depth=depth) - print("{} items".format(count)) + print(f"{count} items") continue dump('/', grp) diff --git a/h5pyd/_apps/hsmv.py b/h5pyd/_apps/hsmv.py index b780b604..3713327a 100755 --- a/h5pyd/_apps/hsmv.py +++ b/h5pyd/_apps/hsmv.py @@ -13,13 +13,7 @@ import sys import logging import os.path as op - -try: - import h5pyd -except ImportError as e: - sys.stderr.write("ERROR : %s : install it to use this utility...\n" % str(e)) - sys.exit(1) - +import h5pyd if __name__ == "__main__": from config import Config @@ -28,36 +22,33 @@ cfg = Config() - - #---------------------------------------------------------------------------------- def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() print("Usage:\n") - print((" {} [ OPTIONS ] source_domain des_domain".format(cfg["cmd"]))) - print((" {} [ OPTIONS ] source_domain folder".format(cfg["cmd"]))) + print(f" {cmd} [ OPTIONS ] source_domain des_domain") + print(f" {cmd} [ OPTIONS ] source_domain folder") print("") print("Description:") print(" Move domain from one location to another") print(" source_domain: domain to be moved ") - print(" des_domain: desttnation domain") + print(" des_domain: destination domain") print(" folder: destination folder (Unix style ending in '/')") print("") - print("Example:") - print(" hsmv /home/myfolder/file1.h5 /home/myfolder/file2.h5") - print("") print("Options:") - print(" -v | --verbose :: verbose output") - print(" -e | --endpoint :: The HDF Server endpoint, e.g. http://hsdshdflab.hdfgroup.org") - print(" -u | --user :: User name credential") - print(" -p | --password :: Password credential") - print(" -c | --conf :: A credential and config file") - print(" -n | --no-clobber :: Do not overwrite existing domains") - print(" --cnf-eg :: Print a config file and then exit") - print(" --logfile :: logfile path") - print(" --loglevel debug|info|warning|error :: Change log level") - print(" --bucket :: Storage bucket") - print(" -h | --help :: This message.") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") print("") + print("Examples:") + print(f" {cmd} /home/myfolder/file1.h5 /home/myfolder/file2.h5") + print(f" {cmd} /home/myfolder/file2.h5 /home/anotherfolder/") + print("") + print(cfg.get_see_also(cmd)) + print("") + sys.exit(-1) #end print_usage #---------------------------------------------------------------------------------- @@ -85,7 +76,7 @@ def getFile(domain, mode="r"): password = cfg["hs_password"] endpoint = cfg["hs_endpoint"] bucket = cfg["hs_bucket"] - fh = h5pyd.File(domain, mode='r', endpoint=endpoint, username=username, + fh = h5pyd.File(domain, mode=mode, endpoint=endpoint, username=username, password=password, bucket=bucket, use_cache=True) return fh @@ -106,7 +97,6 @@ def createFile(domain, linked_domain=None, no_clobber=False): return fh - def deleteDomain(domain, keep_root=False): # get handle to parent folder @@ -126,119 +116,55 @@ def deleteDomain(domain, keep_root=False): hparent = getFolder(parent_domain, mode='a') except IOError as oe: if oe.errno == 404: # Not Found - sys.exit("Parent domain: {} not found".format(parent_domain)) + sys.exit(f"Parent domain: {parent_domain} not found") elif oe.errno == 401: # Unauthorized sys.exit("Authorization failure") elif oe.errno == 403: # Forbidden sys.exit("Not allowed") else: - sys.exit("Unexpected error: {}".format(oe)) + sys.exit(f"Unexpected error: {oe}") if base_name not in hparent: # note - this may happen if the domain was recently created and not # yet synced to S3 - sys.exit("domain: {} not found".format(domain)) + sys.exit(f"domain: {domain} not found") # delete the domain hparent.delete_item(base_name, keep_root=keep_root) if cfg["verbose"]: if domain.endswith('/'): - print("Folder: {} deleted".format(domain)) + print(f"Folder: {domain} deleted") else: - print("Domain: {} deleted".format(domain)) + print(f"Domain: {domain} deleted") def main(): - loglevel = logging.ERROR - verbose = False - cfg["cmd"] = sys.argv[0].split('/')[-1] - if cfg["cmd"].endswith(".py"): - cfg["cmd"] = "python " + cfg["cmd"] - cfg["logfname"] = None - logfname=None - no_clobber = False - - src_files = [] - argn = 1 - while argn < len(sys.argv): - arg = sys.argv[argn] - val = None - - if arg[0] == '-' and len(src_files) > 0: - # options must be placed before filenames - print("options must precead source files") - usage() - sys.exit(-1) - if len(sys.argv) > argn + 1: - val = sys.argv[argn+1] - if arg in ("-v", "--verbose"): - verbose = True - argn += 1 - elif arg == "--loglevel": - if val == "debug": - loglevel = logging.DEBUG - elif val == "info": - loglevel = logging.INFO - elif val == "warning": - loglevel = logging.WARNING - elif val == "error": - loglevel = logging.ERROR - else: - print("unknown loglevel") - usage() - sys.exit(-1) - argn += 2 - elif arg == '--logfile': - logfname = val - argn += 2 - elif arg in ("-h", "--help"): - usage() - sys.exit(0) - elif arg in ("-e", "--endpoint"): - cfg["hs_endpoint"] = val - argn += 2 - elif arg in ("-u", "--username"): - cfg["hs_username"] = val - argn += 2 - elif arg in ("-p", "--password"): - cfg["hs_password"] = val - argn += 2 - elif arg in ("-b", "--bucket"): - cfg["hs_bucket"] = val - argn += 2 - elif arg in ("-n", "--no-clobber"): - no_clobber = True - argn += 1 - elif arg == '--cnf-eg': - print_config_example() - sys.exit(0) - elif arg[0] == '-': - usage() - sys.exit(-1) - else: - src_files.append(arg) - argn += 1 - - # setup logging - logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(filename)s:%(lineno)d %(message)s', level=loglevel) - logging.debug("set log_level to {}".format(loglevel)) + cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite any domains") + cfg.setitem("hs_owner", None, flags=["-o", "--owner"], choices=["OWNER",], help="set owner (must be run as an admin user)") + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") - # end arg parsing - logging.info("username: {}".format(cfg["hs_username"])) - logging.info("endpoint: {}".format(cfg["hs_endpoint"])) - logging.info("verbose: {}".format(verbose)) + try: + domains = cfg.set_cmd_flags(sys.argv[1:]) + except ValueError as ve: + print(ve) + usage() - if len(src_files) < 2: - # need at least a src and destination + if len(domains) < 2: + # need at least source and target usage() - sys.exit(-1) - src_domain = src_files[0] - des_domain = src_files[1] + + # setup logging + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() + logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) + logging.debug(f"set log_level to {loglevel}") + + src_domain = domains[0] + des_domain = domains[1] if src_domain[0] != '/' or des_domain[0] != '/': print("absolute paths must be used") usage() - sys.exit(-1) if src_domain[-1] == '/': print("folder can not be used for src domain") @@ -247,50 +173,41 @@ def main(): # add on the filename from source to folder path des_domain += op.basename(src_domain) - - logging.info("source domain: {}".format(src_domain)) - logging.info("target domain: {}".format(des_domain)) - - - if cfg["hs_endpoint"] is None: - logging.error('No endpoint given, try -h for help\n') - sys.exit(1) - logging.info("endpoint: {}".format(cfg["hs_endpoint"])) + logging.info(f"source domain: {src_domain}") + logging.info(f"target domain: {des_domain}") # get root id of source file try: - fin = getFile(src_domain, "r") + fin = getFile(src_domain) except IOError as oe: # this will fail if we try to open a folder - msg = "Error: {} getting domain: {}".format(oe.errno, src_domain) + msg = f"Error: {oe.errno} getting domain: {src_domain}" logging.error(msg) - print(msg) sys.exit(str(oe)) - logging.info("src root id: {}".format(fin.id.id)) + logging.info(f"src root id: {fin.id.id}") fin.close() # create a new file using the src domain for the root group + no_clobber = cfg["no_clobber"] try: fout = createFile(des_domain, linked_domain=src_domain, no_clobber=no_clobber) except IOError as oe: - msg = "Error: {} creating domain: {}".format(oe.errno, des_domain) + msg = f"Error: {oe.errno} creating domain: {des_domain}" logging.error(msg) - print(msg) sys.exit(str(oe)) - logging.info("des root id: {}".format(fout.id.id)) + cfg.print(f"{src_domain} copied to {des_domain}") + logging.info(f"des root id: {fout.id.id}") try: deleteDomain(src_domain, keep_root=True) - logging.info("domain: {} removed".format(src_domain)) + logging.info(f"domain: {src_domain} removed") except IOError as oe: - msg = "Error: {} removing source domain: {}".format(oe.errno, src_domain) + msg = f"Error: {oe.errno} removing source domain: {src_domain}" logging.error(msg) - print(msg) sys.exit(str(oe)) - # __main__ if __name__ == "__main__": main() diff --git a/h5pyd/_apps/hsstat.py b/h5pyd/_apps/hsstat.py new file mode 100644 index 00000000..59c2187e --- /dev/null +++ b/h5pyd/_apps/hsstat.py @@ -0,0 +1,207 @@ +############################################################################## +# Copyright by The HDF Group. # +# All rights reserved. # +# # +# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and # +# Utilities. The full HSDS copyright notice, including # +# terms governing use, modification, and redistribution, is contained in # +# the file COPYING, which can be found at the root of the source code # +# distribution tree. If you do not have access to this file, you may # +# request a copy from help@hdfgroup.org. # +############################################################################## + +import sys +import logging +from datetime import datetime +import h5pyd + +if __name__ == "__main__": + from config import Config +else: + from .config import Config + +# +# Print objects in a domain in the style of the hsls utilitiy +# + +cfg = Config() + +# +# log error and abort app +# +def abort(msg): + logging.error(msg) + if cfg["logfile"]: + # write to stderr if we are output logs to a file + sys.stderr.write(msg + "\n") + logging.error("exiting program with return code -1") + sys.exit(-1) + +# +# Usage +# +def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() + print("Usage:\n") + print(f" {cmd} [ OPTIONS ] domain") + print(f" {cmd} [ OPTIONS ] folder") + print("") + print("Description:") + print(" Get domain stats for domain or folder") + print(" domain: HSDS domain (absolute path with or without 'hdf5:// prefix)") + print(" folder: HSDS folder (path as above ending in '/')") + print("") + + print("Options:") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") + print("") + print("examples:") + print(f" {cmd} -e http://hsdshdflab.hdfgroup.org") + print(f" {cmd} -e http://hsdshdflab.hdfgroup.org /shared/tall.h5") + print(cfg.get_see_also(cmd)) + print("") + sys.exit() + + +def format_size(n): + if n is None or n == " ": + return " " * 8 + symbol = " " + if not cfg["human_readable"]: + return str(n) + # convert to common storage unit + for s in ("B", "K", "M", "G", "T"): + if n < 1024: + symbol = s + break + n /= 1024 + if symbol == "B": + return "{:}B".format(n) + else: + return "{:.1f}{}".format(n, symbol) + + +def getDomainInfo(domain, cfg): + """get info about the domain and print""" + username = cfg["hs_username"] + password = cfg["hs_password"] + endpoint = cfg["hs_endpoint"] + bucket = cfg["hs_bucket"] + if "rescan" in cfg and cfg["rescan"]: + mode = "r+" # need write intent + else: + mode = "r" + + if domain.endswith("/"): + is_folder = True + else: + is_folder = False + + try: + if is_folder: + f = h5pyd.Folder( + domain, + mode=mode, + endpoint=endpoint, + username=username, + password=password, + bucket=bucket, + use_cache=True, + ) + else: + f = h5pyd.File( + domain, + mode=mode, + endpoint=endpoint, + username=username, + password=password, + bucket=bucket, + use_cache=False, + ) + except IOError as oe: + if oe.errno in (404, 410): # Not Found + abort(f"domain: {domain} not found") + elif oe.errno == 401: # Unauthorized + abort("Authorization failure") + elif oe.errno == 403: # Forbidden + abort("Not allowed") + else: + abort(f"Unexpected error: {oe}") + + timestamp = datetime.fromtimestamp(int(f.modified)) + if not is_folder and f.last_scan: + last_scan = datetime.fromtimestamp(int(f.last_scan)) + else: + last_scan = None + + if is_folder: + print(f"folder: {domain}") + print(f" owner: {f.owner}") + print(f" last modified: {timestamp}") + else: + if "rescan" in cfg and cfg["rescan"]: + f.run_scan() + + # report HDF objects (groups, datasets, and named datatypes) vs. allocated chunks + num_objects = f.num_groups + f.num_datatypes + f.num_datasets + if f.num_chunks > 0: + num_chunks = f.num_chunks + else: + # older storeinfo format doesn't have num_chunks, so calculate + num_chunks = f.num_objects - num_objects + + print(f"domain: {domain}") + print(f" owner: {f.owner}") + print(f" id: {f.id.id}") + print(f" last modified: {timestamp}") + if last_scan: + print(f" last scan: {last_scan}") + if f.md5_sum: + print(f" md5 sum: {f.md5_sum}") + print(f" total_size: {format_size(f.total_size)}") + print(f" allocated_bytes: {format_size(f.allocated_bytes)}") + if f.metadata_bytes: + print(f" metadata_bytes: {format_size(f.metadata_bytes)}") + if f.linked_bytes: + print(f" linked_bytes: {format_size(f.linked_bytes)}") + print(f" num objects: {num_objects}") + print(f" num chunks: {num_chunks}") + if f.num_linked_chunks: + print(f" linked chunks: {f.num_linked_chunks}") + + f.close() +# +# Main +# +def main(): + domains = [] + + cfg.setitem("human_readable", False, flags=["-H", "--human-readable"], help="print human readable sizes (e.g. 123M)") + cfg.setitem("rescan", False, flags=["--rescan",], help="refresh domain stats (for use when domain is provided)") + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") + + try: + domains = cfg.set_cmd_flags(sys.argv[1:]) + except ValueError as ve: + print(ve) + usage() + + # setup logging + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() + logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) + logging.debug(f"set log_level to {loglevel}") + + if not domains: + abort("no domain provided!") + + for domain in domains: + getDomainInfo(domain, cfg) + + +if __name__ == "__main__": + main() diff --git a/h5pyd/_apps/hstouch.py b/h5pyd/_apps/hstouch.py index 1d463130..3bc83120 100644 --- a/h5pyd/_apps/hstouch.py +++ b/h5pyd/_apps/hstouch.py @@ -18,7 +18,7 @@ def getFolder(domain): password = cfg["hs_password"] endpoint = cfg["hs_endpoint"] bucket = cfg["hs_bucket"] - #print("getFolder", domain) + logging.debug(f"getFolder({domain})") dir = h5py.Folder(domain, endpoint=endpoint, username=username, password=password, bucket=bucket) return dir @@ -27,24 +27,23 @@ def createFolder(domain): password = cfg["hs_password"] endpoint = cfg["hs_endpoint"] bucket = cfg["hs_bucket"] - #print("getFolder", domain) owner = None if "hs_owner" in cfg: owner=cfg["hs_owner"] + logging.debug(f"createFolder({domain})") dir = h5py.Folder(domain, mode='x', endpoint=endpoint, username=username, password=password, bucket=bucket, owner=owner) return dir -def getFile(domain): +def getFile(domain, mode="a"): username = cfg["hs_username"] password = cfg["hs_password"] endpoint = cfg["hs_endpoint"] bucket = cfg["hs_bucket"] - #print("getFile", domain) - fh = h5py.File(domain, mode='r', endpoint=endpoint, username=username, password=password, bucket=bucket) + logging.debug(f"getFile(domain={domain}, mode={mode})") + fh = h5py.File(domain, mode=mode, endpoint=endpoint, username=username, password=password, bucket=bucket) return fh def createFile(domain): - #print("createFile", domain) username = cfg["hs_username"] password = cfg["hs_password"] endpoint = cfg["hs_endpoint"] @@ -52,93 +51,118 @@ def createFile(domain): owner = None if "hs_owner" in cfg: owner=cfg["hs_owner"] + logging.debug(f"createFile({domain})") fh = h5py.File(domain, mode='x', endpoint=endpoint, username=username, password=password, bucket=bucket, owner=owner) return fh - - -def touchDomain(domain): - - make_folder = False +def getParentDomain(domain): if domain[-1] == '/': - make_folder = True - domain = domain[:-1] + if len(domain) > 1: + domain = domain[:-1] + parent_domain = op.dirname(domain) + if not parent_domain.endswith("/"): + parent_domain += "/" + return parent_domain +def touchDomain(domain): # get handle to parent folder - parent_domain = op.dirname(domain) + parent_domain = getParentDomain(domain) - if parent_domain == '/': - #if cfg["hs_username"] != "admin": - # sys.exit("Only admin user can create top-level domains") - if not make_folder: - sys.exit("Only folders can be created as a top-level domain") + if parent_domain == "/": + if not domain.endswith("/"): + msg = "Only folders can be created as a top-level domain" + logging.error(msg) + sys.exit(msg) if len(domain) < 4: - sys.exit("Top-level folders must be at least three characters") + msg = "Top-level folders must be at least three characters" + logging.error(msg) + sys.exit(msg) else: - if not parent_domain.endswith('/'): - parent_domain += '/' try: getFolder(parent_domain) except IOError as oe: #print("errno:", oe.errno) if oe.errno in (404, 410): # Not Found - sys.exit("Parent domain: {} not found".format(parent_domain)) + sys.exit(f"Parent domain: {parent_domain} not found") elif oe.errno == 401: # Unauthorized sys.exit("Authorization failure") elif oe.errno == 403: # Forbidden sys.exit("Not allowed") else: - sys.exit("Unexpected error: {}".format(oe)) + sys.exit(f"Unexpected error: {oe}") hdomain = None try: - hdomain = getFile(domain) + if domain.endswith("/"): + hdomain = getFolder(domain) + else: + hdomain = getFile(domain, mode="r") except IOError as oe: - #print("errno:", oe.errno) if oe.errno in (404, 410): # Not Found pass # domain not found else: - sys.exit("Unexpected error: {}".format(oe)) + sys.exit(f"Unexpected error: {oe}") - if hdomain: - if not make_folder: + if hdomain is not None: + logging.debug(f"domain: {domain} exists") + if domain.endswith("/"): + sys.exit("Can not update timestamp of folder object") + else: try: + # get domain for updating + hdomain = getFile(domain, mode="a") r = hdomain['/'] # create/update attribute to update lastModified timestamp of domain r.attrs["hstouch"] = 1 + cfg.print(f"updated timestamp for domain: {domain}") hdomain.close() except IOError as oe: - sys.exit("Got error updating domain: {}".format(oe)) - else: - sys.exit("Can not update timestamp of folder object") - hdomain.close() + msg = f"Got error updating domain: {oe}" + logging.error(msg) + sys.exit(msg) else: # create domain - if not make_folder: + if not domain.endswith("/"): try: fh = createFile(domain) - if cfg["verbose"]: - print("domain created: {}, root id: {}".format(domain, fh.id.id)) + cfg.print(f"domain created: {domain}, root id: {fh.id.id}") fh.close() except IOError as oe: - sys.exit("Got error updating domain: {}".format(oe)) + sys.exit(f"Got error updating domain: {oe}") else: # make folder try: - fh = createFolder(domain + '/') - if cfg["verbose"]: - print("folder created", domain + '/') + fh = createFolder(domain) + cfg.print(f"folder created {domain}") fh.close() except IOError as oe: - sys.exit("Got error updating domain: {}".format(oe)) + sys.exit(f"Got error updating domain: {oe}") # # Usage # -def printUsage(): - print("usage: {} [-v] [-e endpoint] [-u username] [-p password] [-o owner] [--loglevel debug|info|warning|error] [--logfile ] [--bucket ] domains".format(cfg["cmd"])) - print("example: {} -e http://hsdshdflab.hdfgroup.org /home/myfolder/emptydomain.h5".format(cfg["cmd"])) +def usage(): + option_names = cfg.get_names() + cmd = cfg.get_cmd() + print("Usage:\n") + print(f" {cmd} [ OPTIONS ] domain") + print(f" {cmd} [ OPTIONS ] folder") + print("") + print("Description:") + print(" Create a new domain or folder") + print(" domain: HSDS domain (absolute path with or without 'hdf5:// prefix)") + print(" folder: HSDS folder (path as above ending in '/')") + print("") + print("Options:") + for name in option_names: + help_msg = cfg.get_help_message(name) + if help_msg: + print(f" {help_msg}") + print("") + print(f"Example: {cmd} hdf5://home/myfolder/emptydomain.h5") + print(cfg.get_see_also(cmd)) + print("") sys.exit() # @@ -146,66 +170,25 @@ def printUsage(): # def main(): domains = [] - argn = 1 - loglevel = logging.ERROR - logfname=None - cfg["cmd"] = sys.argv[0].split('/')[-1] - if cfg["cmd"].endswith(".py"): - cfg["cmd"] = "python " + cfg["cmd"] - cfg["verbose"] = False - - while argn < len(sys.argv): - arg = sys.argv[argn] - val = None - if len(sys.argv) > argn + 1: - val = sys.argv[argn+1] - - if arg in ("-h", "--help"): - printUsage() - elif arg in ("-v", "--verbose"): - cfg["verbose"] = True - argn += 1 - elif arg == "--loglevel": - val = val.upper() - if val == "DEBUG": - loglevel = logging.DEBUG - elif val == "INFO": - loglevel = logging.INFO - elif val in ("WARN", "WARNING"): - loglevel = logging.WARNING - elif val == "ERROR": - loglevel = logging.ERROR - else: - printUsage() - argn += 2 - elif arg in ("-e", "--endpoint"): - cfg["hs_endpoint"] = sys.argv[argn+1] - argn += 2 - elif arg in ("-u", "--username"): - cfg["hs_username"] = sys.argv[argn+1] - argn += 2 - elif arg in ("-p", "--password"): - cfg["hs_password"] = sys.argv[argn+1] - argn += 2 - elif arg in ("-b", "--bucket"): - cfg["hs_bucket"] = val - argn += 2 - elif arg in ("-o", "--owner"): - cfg["hs_owner"] = sys.argv[argn+1] - argn += 2 - elif arg[0] == '-': - printUsage() - else: - domains.append(arg) - argn += 1 + # additional options + cfg.setitem("hs_owner", None, flags=["-o", "--owner"], choices=["OWNER",], help="set owner (must be run as an admin user)") + cfg.setitem("help", False, flags=["-h", "--help"], help="this message") + + try: + domains = cfg.set_cmd_flags(sys.argv[1:]) + except ValueError as ve: + print(ve) + usage() if len(domains) == 0: # need a domain - printUsage() + usage() # setup logging + logfname = cfg["logfile"] + loglevel = cfg.get_loglevel() logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) - logging.debug("set log_level to {}".format(loglevel)) + logging.debug(f"set log_level to {loglevel}") for domain in domains: touchDomain(domain) diff --git a/h5pyd/_apps/utillib.py b/h5pyd/_apps/utillib.py index e42c1012..2063e291 100755 --- a/h5pyd/_apps/utillib.py +++ b/h5pyd/_apps/utillib.py @@ -1027,9 +1027,11 @@ def write_dataset(src, tgt, ctx): print(msg) resize_dataset(tgt, new_extent, axis=0) - if ctx["dataload"] == "link": - # don't write chunks, but update chunktable for chunk ref indirect - if tgt.id.layout and tgt.id.layout["class"] == "H5D_CHUNKED_REF_INDIRECT": + + if tgt.id.layout and tgt.id.layout["class"] != "H5D_CHUNKED": + # this is one of the ref layouts + if tgt.id.layout["class"] == "H5D_CHUNKED_REF_INDIRECT": + # don't write chunks, but update chunktable for chunk ref indirect update_chunktable(src, tgt, ctx) else: pass # skip chunkterator for link option @@ -1061,7 +1063,7 @@ def write_dataset(src, tgt, ctx): for src_s in it: logging.debug(f"src selection: {src_s}") - if rank == 1: + if rank == 1 and isinstance(src_s, slice): start = src_s.start + offset[0] stop = src_s.stop + offset[0] if len(tgt.shape) > rank: diff --git a/setup.py b/setup.py index 7abc66a0..35ae78d0 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ "hscp = h5pyd._apps.hscopy:main", "hsmv = h5pyd._apps.hsmv:main", "hsdiff = h5pyd._apps.hsdiff:main", + "hsstat = h5pyd._apps.hsstat:main", ] }, )