From dbd51a5daf42e128a6017634ad33aff1c3e08ff1 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 10:49:56 +0100 Subject: [PATCH 01/37] Add ci --- .travis.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..7f5bf24 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +notifications: + slack: wtsi-cgpit:ptUMR1tkNyZJYd9TpGoss8WR + email: false + +language: python +python: + - "3.6" + +install: + - pip install nose + - pip install progressbar2 + - pip install xlrd + +script: + - ./run_tests.sh From 990695e28d286759465720d2383cbbcc19783ea2 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 10:53:15 +0100 Subject: [PATCH 02/37] vanilla nose --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7f5bf24..cf25633 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,4 +12,4 @@ install: - pip install xlrd script: - - ./run_tests.sh + - nosetests From faac016086fb75aeebd3b03936bc6a6ccf6880eb Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 11:00:04 +0100 Subject: [PATCH 03/37] Add coverage, aiming for codeclimate reports --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index cf25633..dac145b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,9 +7,9 @@ python: - "3.6" install: - - pip install nose + - pip install nose coverage - pip install progressbar2 - pip install xlrd script: - - nosetests + - nosetests --with-coverage --cover-erase --cover-package=cgp_seq_input_val From 77400701d71d6d208717194647caa6ddb0fb34c6 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 11:24:55 +0100 Subject: [PATCH 04/37] Add config for codeclimate --- .codeclimate.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .codeclimate.yml diff --git a/.codeclimate.yml b/.codeclimate.yml new file mode 100644 index 0000000..fed5c9a --- /dev/null +++ b/.codeclimate.yml @@ -0,0 +1,24 @@ +engines: + duplication: + enabled: true + config: + languages: + - python + #mass_threshold: 30 + fixme: + enabled: true + markdownlint: + enabled: true + pep8: + enabled: true + radon: + enabled: true + +ratings: + paths: + - "**.py" + - "**.md" + +exclude_paths: + - "git-hooks/" + - "data/" From 03cd3d43814ab6a7c7e3b62b880cd4424993d85e Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 11:25:53 +0100 Subject: [PATCH 05/37] correct yml lint --- .codeclimate.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.codeclimate.yml b/.codeclimate.yml index fed5c9a..ab51933 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -1,8 +1,8 @@ engines: duplication: - enabled: true + enabled: true config: - languages: + languages: - python #mass_threshold: 30 fixme: From 5a7851e9e58158320f9635b9cfb10d561087f48b Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 11:32:26 +0100 Subject: [PATCH 06/37] Prevent tests from being evaluated --- .codeclimate.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.codeclimate.yml b/.codeclimate.yml index ab51933..91aab99 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -22,3 +22,4 @@ ratings: exclude_paths: - "git-hooks/" - "data/" + - "tests/" From ac5b208c70995aa6e7568d792b4886982dff701c Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 11:40:56 +0100 Subject: [PATCH 07/37] Fixes markdown lint warnings --- README.md | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 81fdc3f..10b3375 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,8 @@ And a `json` version of the file ready for use by downstream systems. ### validate_seq_file.py -Takes an interleaved or a pair of paired-fastq files and produces a simple report of: +Takes an interleaved or a pair of paired-fastq files and produces a simple report +of: ``` { @@ -53,50 +54,55 @@ Various exceptions can occur for malformed files. The primary purpose is to confirm Sanger/Illumina 1.8+ quality scores. -#### Why no BAM/CRAM input? +#### FASTQ not BAM/CRAM The flow of the service data will require splitting of any multi-lane BAM/CRAM files down to the individual lanes, which we would do to interleaved fastq. There is no current need to parse BAM/CRAM files to check quality encoding directly as the spec -technically disallows it. It is possible for BAM files to be incorrectly encoded though. +technically disallows it. It is possible for BAM files to be incorrectly encoded +though. +## INSTALL -# INSTALL - -Installation is via `easy_install`. Simply execute with the path to the compiled 'egg': +Installation is via `easy_install`. Simply execute with the path to the compiled +'egg': ```bash easy_install bundles/cgp_seq_input_val-0.1.0-py3.6.egg ``` -## Package Dependancies +### Package Dependancies `easy_install` will install the relevant dependancies, listed here for convenience: * [progressbar2](http://progressbar-2.readthedocs.io/en/latest/) * [xlrd](https://github.com/python-excel/xlrd) +## Development environment -# Development environment -This project uses git pre-commit hooks. As these will execute on your system it is entirely up to you if you activate them. +This project uses git pre-commit hooks. As these will execute on your system it +is entirely up to you if you activate them. -If you want tests, coverage reports and lint-ing to automatically execute before a commit you can activate them by running: +If you want tests, coverage reports and lint-ing to automatically execute before +a commit you can activate them by running: ``` git config core.hooksPath git-hooks ``` -Only a test failure will block a commit, lint-ing is not enforced (but please consider following the guidance). +Only a test failure will block a commit, lint-ing is not enforced (but please consider +following the guidance). -You can run the same checks manually without a commit by executing the following in the base of the clone: +You can run the same checks manually without a commit by executing the following +in the base of the clone: ```bash ./run_tests.py ``` -## Development Dependencies +### Development Dependencies -### Setup VirtualEnv: +#### Setup VirtualEnv ``` cd $PROJECTROOT @@ -114,11 +120,11 @@ env/bin/pip install coverage env/bin/pip install pylint ``` -__Also see [Package Dependancies](#package-dependancies)__ +__Also see__ [Package Dependancies](#package-dependancies) -## Cutting a release +### Cutting a release -__Make sure the version is incremented in ./setup.py__ +__Make sure the version is incremented__ in `./setup.py` The release is handled by setuptools: From dafa87439f532e9f492ec157e8fc5bc4c3b25c0e Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 12:05:07 +0100 Subject: [PATCH 08/37] cleanup style and lint errors --- .codeclimate.yml | 1 + cgp_seq_input_val/cliutil.py | 1 + cgp_seq_input_val/error_classes.py | 4 + cgp_seq_input_val/fastq_read.py | 12 ++- cgp_seq_input_val/file_meta.py | 10 +- cgp_seq_input_val/manifest.py | 144 +++++++++++++++++++---------- cgp_seq_input_val/seq_validator.py | 8 +- setup.py | 5 +- 8 files changed, 121 insertions(+), 64 deletions(-) diff --git a/.codeclimate.yml b/.codeclimate.yml index 91aab99..70efdda 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -11,6 +11,7 @@ engines: enabled: true pep8: enabled: true + max-line-length: 120 radon: enabled: true diff --git a/cgp_seq_input_val/cliutil.py b/cgp_seq_input_val/cliutil.py index 6632a94..f82f1d7 100644 --- a/cgp_seq_input_val/cliutil.py +++ b/cgp_seq_input_val/cliutil.py @@ -1,6 +1,7 @@ """General command line utility functions""" import os + def extn_check(parser, choices, fname, readable=False): """Checks file extensions fit expected sets diff --git a/cgp_seq_input_val/error_classes.py b/cgp_seq_input_val/error_classes.py index 52664b5..6a9eab7 100644 --- a/cgp_seq_input_val/error_classes.py +++ b/cgp_seq_input_val/error_classes.py @@ -1,21 +1,25 @@ """Package error classes""" + class SeqValidationError(RuntimeError): """Exception for failures to validate data in the manifest.""" pass + class ConfigError(RuntimeError): """ Exception for errors in the values of config/*.json files. """ pass + class ParsingError(RuntimeError): """ Exception for errors in the naming of the config/*.json files. """ pass + class ValidationError(RuntimeError): """ Exception for failures to validate data in the manifest. diff --git a/cgp_seq_input_val/fastq_read.py b/cgp_seq_input_val/fastq_read.py index a485763..620e62d 100644 --- a/cgp_seq_input_val/fastq_read.py +++ b/cgp_seq_input_val/fastq_read.py @@ -6,10 +6,11 @@ from cgp_seq_input_val.error_classes import SeqValidationError + class FastqRead(object): """ - Models and validates a fastq read, calling print will produce a 4 line record - regardess of original format. + Models and validates a fastq read, calling print will produce a 4 line + record regardess of original format. Inputs: fp: open file pointer to get next read from @@ -50,7 +51,7 @@ def __init__(self, fq_fh, line_no_in, curr_line): self.seq = seq self.qual = qual self.file_pos = (line_no_in, line_no) - self.last_line = curr_line # as we need to pass this back + self.last_line = curr_line # as we need to pass this back self.name = None self.end = None @@ -69,8 +70,9 @@ def validate(self, filename): """ match = re.match(r'@(\S+)/([12])', self.header) if match is None: - raise SeqValidationError("Sequence record header must begin with '@' \ - one non-whitespace character and '/[12]', line %d of %s" + raise SeqValidationError("Sequence record header must begin with \ + '@' one non-whitespace character and \ + '/[12]', line %d of %s" % (self.file_pos[0], filename)) groups = match.groups() self.name = groups[0] diff --git a/cgp_seq_input_val/file_meta.py b/cgp_seq_input_val/file_meta.py index bb72887..bf8a7b2 100644 --- a/cgp_seq_input_val/file_meta.py +++ b/cgp_seq_input_val/file_meta.py @@ -4,6 +4,7 @@ import os + class FileMeta(object): """ Oject to hold file metadata as a set of attributes with small set of @@ -26,7 +27,8 @@ def __init__(self, headers, details, rel_path): def get_path(self, f_type): """ Returns the path of a file after pre-pending with the 'rel_path' - All file entries in the manifest should be relative to the manifest itself. + All file entries in the manifest should be relative to the manifest + itself. """ item = self.attributes[f_type] if item == '.': @@ -45,9 +47,11 @@ def test_files(self, line): continue if not os.path.isfile(full_path): - raise FileValidationError("'%s' is not a file ('%s' - line %d)." % (item, f_type, line)) + raise FileValidationError("'%s' is not a file ('%s' - line %d)." + % (item, f_type, line)) if not os.path.getsize(full_path): - raise FileValidationError("'%s' is an empty file ('%s' - line %d)." % (item, f_type, line)) + raise FileValidationError("'%s' is an empty file ('%s' - line %d)." + % (item, f_type, line)) class FileValidationError(RuntimeError): """ diff --git a/cgp_seq_input_val/manifest.py b/cgp_seq_input_val/manifest.py index dd1ee22..369ea3b 100644 --- a/cgp_seq_input_val/manifest.py +++ b/cgp_seq_input_val/manifest.py @@ -15,8 +15,11 @@ from cgp_seq_input_val.error_classes import ConfigError, ParsingError, ValidationError from cgp_seq_input_val.file_meta import FileMeta -VAL_LIM_ERROR = "Only %d sample(s) with a value of '%s' is allowed in column '%s' when rows grouped by '%s'" -VAL_LIM_CONFIG_ERROR = "'limit' and 'limit_by' must both be defined when either is present, check body.validate." +VAL_LIM_ERROR = "Only %d sample(s) with a value of '%s' is allowed in column \ + '%s' when rows grouped by '%s'" +VAL_LIM_CONFIG_ERROR = "'limit' and 'limit_by' must both be defined when either \ + is present, check body.validate." + def uuid4_chk(uuid_str): """Tests validity of uuid""" @@ -26,6 +29,7 @@ def uuid4_chk(uuid_str): return False return val.hex == uuid_str.replace('-', '') + def normalise(args): """ Takes the arguments captured by the normalise_manifest.py executable @@ -35,15 +39,18 @@ def normalise(args): # Extensions are checked by argparse if args.input.endswith('tsv') is True: if args.output is None: - print("\nINFO: input and output will be same file, no action required\n", file=sys.stderr) + print("\nINFO: input and output will be same file, no action \ + required\n", file=sys.stderr) return True else: if os.path.exists(args.output): if os.path.samefile(args.input, args.output): - print("\nINFO: input and output point to the same file, no action required", file=sys.stderr) + print("\nINFO: input and output point to the same file, no \ + action required", file=sys.stderr) return True # anything else is a copy - print("\nINFO: input copied to output, no format conversion required.", file=sys.stderr) + print("\nINFO: input copied to output, no format conversion \ + required.", file=sys.stderr) shutil.copyfile(args.input, args.output, follow_symlinks=True) return True @@ -53,10 +60,11 @@ def normalise(args): manifest = Manifest(args.input) manifest.convert_by_extn(args.output) + def evaulate_value_limits(field, chk, limit_chks): """ - Handles validation of fields where presence of partiular value has a max occurence - within a grouping of rows + Handles validation of fields where presence of partiular value has a max + occurence within a grouping of rows """ for val_limit in chk: if 'limit' not in val_limit: @@ -73,11 +81,15 @@ def evaulate_value_limits(field, chk, limit_chks): field, val_limit['limit_by'])) + class Manifest(object): """ Top level object used to validate a manifest TSV file. - This runs validation of the header and body in turn rasing execptions as appropriate. - Configuration is handled via the json files found in the config sub directory. + This runs validation of the header and body in turn rasing execptions as + appropriate. + + Configuration is handled via the json files found in the config sub + directory. """ def __init__(self, infile): self.infile = infile @@ -102,7 +114,10 @@ def _csv_to_tsv(self, ofh): def _excel_to_tsv(self, ofh): xlrd = import_module('xlrd') - book = xlrd.open_workbook(self.infile, formatting_info=False, on_demand=True, ragged_rows=True) + book = xlrd.open_workbook(self.infile, + formatting_info=False, + on_demand=True, + ragged_rows=True) sheet = book.sheet_by_name('For entry') for r in range(0, sheet.nrows): simplerow = [] @@ -115,16 +130,16 @@ def _excel_to_tsv(self, ofh): print("\t".join(simplerow), file=ofh) def convert_by_extn(self, outfile): - """Uses the input file extension to determine the correct file conversion - routine. Output is always tsv file. Expects the output file name extension - to have been checked in advance. + """ + Uses the input file extension to determine the correct file conversion + routine. Output is always tsv file. Expects the output file name + extension to have been checked in advance. """ with open(outfile, 'w') as ofh: convertor = getattr(self, '_' + self.informat + '_to_tsv') convertor(ofh) - def validate(self, checkFiles=False): """ Runs the actual validation of a manifest: @@ -135,7 +150,8 @@ def validate(self, checkFiles=False): - Validate body """ if self.informat != 'tsv': - raise ValueError('Manifest.validate only accepts files of type "tsv"') + raise ValueError('Manifest.validate only accepts files of type \ + "tsv"') # Generate the header object self.header = Header(self.infile) self.config = self.header.get_config() @@ -176,9 +192,11 @@ def write(self, outdir): def get_uuid(self): """Get the uuid for this manifest""" if not self.header: - raise ValidationError('manifest.validate() must be called before manifest.get_uuid()') + raise ValidationError('manifest.validate() must be called before \ + manifest.get_uuid()') return self.header.uuid + class Header(object): """ Object to load and validate the header section of a manifest @@ -223,7 +241,9 @@ def get_config(self, cfg_file=None): config = None if cfg_file is None: resource = 'config/%s-%s.json' % (self.type, self.version) - resource_as_string = resource_string(__name__, resource).decode("utf-8", "strict") + resource_as_string = resource_string(__name__, + resource).decode("utf-8", + "strict") config = json.loads(resource_as_string) # for error messages cfg_file = resource_filename(__name__, resource) @@ -233,11 +253,11 @@ def get_config(self, cfg_file=None): config = json.load(j) if config['type'] != self.type: - raise ParsingError("Filename (%s) does not match 'type' (%s) within file" - % (cfg_file, config['type'])) + raise ParsingError("Filename (%s) does not match 'type' (%s) \ + within file" % (cfg_file, config['type'])) if config['version'] != self.version: - raise ParsingError("Filename (%s) does not match 'version' (%s) within file" - % (cfg_file, config['version'])) + raise ParsingError("Filename (%s) does not match 'version' (%s) \ + within file" % (cfg_file, config['version'])) self.validate_json(config) return config @@ -253,37 +273,40 @@ def validate_json(self, config): - body content validated by it's own class. """ if 'header' not in config: - raise ConfigError("header (dict/hash) not found in json file: %s-%s.json" - % (self.type, self.version)) + raise ConfigError("header (dict/hash) not found in json file: \ + %s-%s.json" % (self.type, self.version)) if 'expected' not in config['header']: - raise ConfigError("header.expected (list/array) not found in json file: %s-%s.json" - % (self.type, self.version)) + raise ConfigError("header.expected (list/array) not found in json \ + file: %s-%s.json" % (self.type, self.version)) if 'required' not in config['header']: - raise ConfigError("header.required (list/array) not found in json file: %s-%s.json" - % (self.type, self.version)) + raise ConfigError("header.required (list/array) not found in json \ + file: %s-%s.json" % (self.type, self.version)) if 'validate' not in config['header']: - raise ConfigError("header.validate (dict/hash) not found in json file: %s-%s.json" - % (self.type, self.version)) + raise ConfigError("header.validate (dict/hash) not found in json \ + file: %s-%s.json" % (self.type, self.version)) if 'body' not in config: - raise ConfigError("body (dict/hash) not found in json file: %s-%s.json" - % (self.type, self.version)) + raise ConfigError("body (dict/hash) not found in json file: \ + %s-%s.json" % (self.type, self.version)) + def fields_exist(self, expected): """ - Checks all field that are expected to exist in the header of this type+version - of the manifest. It is not checking for values, just the expected elements. - These are detailed in the header.expected element of the json file. - Adds these to the 'items' dict of the header object. + Checks all field that are expected to exist in the header of this + type+version of the manifest. It is not checking for values, just the + expected elements. These are detailed in the header.expected element of + the json file. Adds these to the 'items' dict of the header object. """ found = set(self._all_items.keys()) expected_fields = set(expected) unexpected = found.difference(expected_fields) - if unexpected: # empty sequences are false, don't use "len() > 0" - raise ValidationError("The following unexpected fields were found in the header of your file:\n\t'" + if unexpected: + raise ValidationError("The following unexpected fields were found \ + in the header of your file:\n\t'" + "'\n\t'".join(unexpected) + "'") missing_fields = expected_fields.difference(found) if missing_fields: - raise ValidationError("The following expected fields were missing from the header of your file:\n\t'" + raise ValidationError("The following expected fields were missing \ + from the header of your file:\n\t'" + "'\n\t'".join(missing_fields) + "'") # add the elements to the approved header items dict for key, val in self._all_items.items(): @@ -291,22 +314,26 @@ def fields_exist(self, expected): continue self.items[key] = val + def fields_have_values(self, required): """ Check all fields that should have values do for this type+version. These are detailed in the header.required element of the json file. """ for item in required: - if not self.items[item]: # empty sequences are false, don't use "len() == 0" + if not self.items[item]: raise ValidationError("Header item '%s' has no value." % (item)) + def field_values_valid(self, validate): """ Checks all restricted fields have valid values for this type+version. """ for item in validate: if self.items[item] not in validate[item]: - raise ValidationError("Header item '%s' has an invalid value of: %s" % (item, self.items[item])) + raise ValidationError("Header item '%s' has an invalid value \ + of: %s" % (item, self.items[item])) + def validate(self, rules): """ @@ -326,9 +353,11 @@ def validate(self, rules): else: uuid_found = self.items['Our Ref:'] if not uuid4_chk(uuid_found): - raise ValidationError("Value found at 'Our Ref' is not a valid uuid4: "+uuid_found) + raise ValidationError("Value found at 'Our Ref' is not a valid \ + uuid4: "+uuid_found) self.uuid = uuid_found + class Body(object): """ Body object validates the individual records of a manifest. @@ -337,7 +366,7 @@ class Body(object): """ def __init__(self, manifest, config): self.manifest = manifest - self.offset = 1 # start at one as would need to increment for header line otherwise + self.offset = 1 # start at one otherwise need to increment for header manifest_dir = os.path.dirname(manifest) csv = import_module('csv') self.file_detail = [] @@ -352,7 +381,10 @@ def __init__(self, manifest, config): if not loadRows: self.offset += 1 continue - self.file_detail.append(FileMeta(self.headings, row, manifest_dir)) + self.file_detail.append(FileMeta(self.headings, + row, + manifest_dir)) + def write(self, fp, config): """ @@ -372,6 +404,7 @@ def write(self, fp, config): print("\t".join(row), file=fp) return for_json + def validate(self, rules): """ Runs the different elements of body validation: @@ -383,12 +416,13 @@ def validate(self, rules): self.uniq_files() self.file_ext_check(rules['validate_ext']) + def field_values_valid(self, validate): """ Check fields with restriced dict are valid Must run after self.fields_have_values() - If 'limit' and 'limit_by' are defined will create a counter for each of these entities - and error if 'limit' exceeded + If 'limit' and 'limit_by' are defined will create a counter for each of + these entities and error if 'limit' exceeded """ for field, chk in validate.items(): cnt = self.offset @@ -397,7 +431,8 @@ def field_values_valid(self, validate): cnt += 1 # checks all values are valid if fd.attributes[field] not in [d['value'] for d in chk]: - raise ValidationError("Metadata item '%s' has an invalid value of '%s' on line %d" + raise ValidationError("Metadata item '%s' has an invalid \ + value of '%s' on line %d" % (field, fd.attributes[field], cnt)) # Construct value occurence limiting counts for val_limit in chk: @@ -419,6 +454,7 @@ def field_values_valid(self, validate): limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] += 1 evaulate_value_limits(field, chk, limit_chks) + def fields_have_values(self, rules): """ Check the fields listed as required are populated @@ -428,9 +464,11 @@ def fields_have_values(self, rules): cnt += 1 for req in rules: if (not fd.attributes[req]) or fd.attributes[req] == '.': - raise ValidationError("Required metadata value absent for '%s' on line %d ('.' not acceptable)" + raise ValidationError("Required metadata value absent for \ + '%s' on line %d ('.' not acceptable)" % (req, cnt)) + def uniq_files(self): """ Check all filenames are uniq within this manifest @@ -444,10 +482,12 @@ def uniq_files(self): if item == '.': continue if item in all_files: - raise ValidationError("Metadata item '%s' has a duplicate value of '%s' on line %d" + raise ValidationError("Metadata item '%s' has a duplicate \ + value of '%s' on line %d" % (f_type, item, cnt)) all_files.append(item) + def file_ext_check(self, rules): """ Check all files have valid extentions @@ -469,14 +509,17 @@ def file_ext_check(self, rules): full_ext = ext + extra if full_ext not in rules[f_type]: - raise ValidationError("File extension of '%s' is not valid, '%s' on line %d" + raise ValidationError("File extension of '%s' is not valid, \ + '%s' on line %d" % (full_ext, f_type, cnt)) if last_ext is not None and last_ext != full_ext: - raise ValidationError("File extensions for same row must match, '%s' vs '%s' on line %d" + raise ValidationError("File extensions for same row must \ + match, '%s' vs '%s' on line %d" % (last_ext, full_ext, cnt)) last_ext = full_ext + def heading_check(self, config): """ Simple check for correct, ordered headings for file rows. @@ -488,6 +531,7 @@ def heading_check(self, config): + "\nbut got\n\t" + ', '.join(self.headings)) + def file_tests(self): """ Test for file existance and content diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py index 3074378..88d8cb2 100644 --- a/cgp_seq_input_val/seq_validator.py +++ b/cgp_seq_input_val/seq_validator.py @@ -16,6 +16,7 @@ prog_records = 100000 + class SeqValidator(object): """ Validate sequence file, currently only does fastq (interleaved or paired) @@ -32,7 +33,7 @@ def __init__(self, file_a, file_b=None, progress_pairs=prog_records): self.file_b = file_b self.pairs = 0 # will use this to decide on path - self.is_gzip = False # change open method for fastq + self.is_gzip = False # change open method for fastq # sam is not supported # only the min value is actually needed to determine if scaling @@ -62,7 +63,7 @@ def _prep(self): full_ext = ext + full_ext if self.file_b is None: - self.file_b = self.file_a # use equality to indicate interleaved + self.file_b = self.file_a # use equality to indicate interleaved elif not self.file_b.endswith(full_ext): raise SeqValidationError("Input files be of same type") @@ -87,8 +88,7 @@ def report(self, fp): """ report = {'pairs': self.pairs, 'valid_q': self.q_min == 33, - 'interleaved': self.file_a == self.file_b - } + 'interleaved': self.file_a == self.file_b} json.dump(report, fp, sort_keys=True, indent=4) def validate_paired(self): diff --git a/setup.py b/setup.py index f24100a..fc2874b 100755 --- a/setup.py +++ b/setup.py @@ -12,10 +12,11 @@ 'version': '1.1.0', 'python_requires': '>= 3.3', 'setup_requires': ['nose>=1.0'], - 'install_requires': ['progressbar2','xlrd'], + 'install_requires': ['progressbar2', 'xlrd'], 'packages': ['cgp_seq_input_val'], 'package_data': {'cgp_seq_input_val': ['config/*.json']}, - 'scripts': ['bin/normalise_manifest.py', 'bin/validate_manifest.py', 'bin/validate_seq_file.py'] + 'scripts': ['bin/normalise_manifest.py', 'bin/validate_manifest.py', + 'bin/validate_seq_file.py'] } setup(**config) From 866206257de489c698b43fdb0dc098fbfb6becdc Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 12:08:52 +0100 Subject: [PATCH 09/37] Fix line length --- .codeclimate.yml | 1 - tox.ini | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tox.ini diff --git a/.codeclimate.yml b/.codeclimate.yml index 70efdda..91aab99 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -11,7 +11,6 @@ engines: enabled: true pep8: enabled: true - max-line-length: 120 radon: enabled: true diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..4879cb6 --- /dev/null +++ b/tox.ini @@ -0,0 +1,2 @@ +[pep8] +max-line-length = 160 From d9ffaa01149c4e8420964dd7ed484c4e7ff770db Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 12:16:42 +0100 Subject: [PATCH 10/37] More style correctionse --- bin/normalise_manifest.py | 2 +- bin/validate_manifest.py | 5 +++-- bin/validate_seq_file.py | 2 +- cgp_seq_input_val/file_meta.py | 1 + cgp_seq_input_val/manifest.py | 31 +++++++++--------------------- cgp_seq_input_val/seq_validator.py | 7 +++---- 6 files changed, 18 insertions(+), 30 deletions(-) diff --git a/bin/normalise_manifest.py b/bin/normalise_manifest.py index d232cf1..9033fca 100755 --- a/bin/normalise_manifest.py +++ b/bin/normalise_manifest.py @@ -12,7 +12,7 @@ version = pkg_resources.require("cgp_seq_input_val")[0].version -## read variables, auto help text +# read variables, auto help text parser = argparse.ArgumentParser(description='Convert manifest files to common denominator (tsv)', epilog='Input can be [xls|xlsx|csv|tsv]. "tsv" is just copied to maintain tool-chain') parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) diff --git a/bin/validate_manifest.py b/bin/validate_manifest.py index 85f40c8..a753d58 100755 --- a/bin/validate_manifest.py +++ b/bin/validate_manifest.py @@ -14,7 +14,7 @@ version = pkg_resources.require("cgp_seq_input_val")[0].version -## read variables, auto help text +# read variables, auto help text parser = argparse.ArgumentParser(description='Validate a tsv import manifest file') parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) parser.add_argument('-i', '--input', dest='input', metavar='FILE', @@ -30,7 +30,8 @@ try: manifest = Manifest(args.input) manifest.validate() - (tsv_file, json_file) = manifest.write(args.output) # output new manifest in tsv and json. + # output new manifest in tsv and json. + (tsv_file, json_file) = manifest.write(args.output) print("Created files:\n\t%s\n\t%s" % (tsv_file, json_file)) except ValidationError as ve: print("ERROR: " + str(ve), file=sys.stderr) diff --git a/bin/validate_seq_file.py b/bin/validate_seq_file.py index f8b8964..65924b5 100755 --- a/bin/validate_seq_file.py +++ b/bin/validate_seq_file.py @@ -35,7 +35,7 @@ validator = SeqValidator(args.input[0], file_2) validator.validate() validator.report(args.report) -except SeqValidationError as ve: # runtime so no functions for message and errno +except SeqValidationError as ve: # runtime so no functions for message and errno print("ERROR: " + str(ve), file=sys.stderr) exit(1) # have to catch 2 classes works 3.0-3.3, above 3.3 all IO issues are captured under OSError diff --git a/cgp_seq_input_val/file_meta.py b/cgp_seq_input_val/file_meta.py index bf8a7b2..d9456e9 100644 --- a/cgp_seq_input_val/file_meta.py +++ b/cgp_seq_input_val/file_meta.py @@ -53,6 +53,7 @@ def test_files(self, line): raise FileValidationError("'%s' is an empty file ('%s' - line %d)." % (item, f_type, line)) + class FileValidationError(RuntimeError): """ Exception for failures to validate data in the manifest. diff --git a/cgp_seq_input_val/manifest.py b/cgp_seq_input_val/manifest.py index 369ea3b..263974f 100644 --- a/cgp_seq_input_val/manifest.py +++ b/cgp_seq_input_val/manifest.py @@ -139,7 +139,6 @@ def convert_by_extn(self, outfile): convertor = getattr(self, '_' + self.informat + '_to_tsv') convertor(ofh) - def validate(self, checkFiles=False): """ Runs the actual validation of a manifest: @@ -288,7 +287,6 @@ def validate_json(self, config): raise ConfigError("body (dict/hash) not found in json file: \ %s-%s.json" % (self.type, self.version)) - def fields_exist(self, expected): """ Checks all field that are expected to exist in the header of this @@ -300,21 +298,20 @@ def fields_exist(self, expected): expected_fields = set(expected) unexpected = found.difference(expected_fields) if unexpected: + joined_vars = "'\n\t'".join(unexpected) raise ValidationError("The following unexpected fields were found \ - in the header of your file:\n\t'" - + "'\n\t'".join(unexpected) + "'") + in the header of your file:\n\t'" + joined_vars + "'") missing_fields = expected_fields.difference(found) if missing_fields: + joined_vars = "'\n\t'".join(missing_fields) raise ValidationError("The following expected fields were missing \ - from the header of your file:\n\t'" - + "'\n\t'".join(missing_fields) + "'") + from the header of your file:\n\t'" + joined_vars + "'") # add the elements to the approved header items dict for key, val in self._all_items.items(): if key in ('Form type:', 'Form version:'): continue self.items[key] = val - def fields_have_values(self, required): """ Check all fields that should have values do for this type+version. @@ -324,7 +321,6 @@ def fields_have_values(self, required): if not self.items[item]: raise ValidationError("Header item '%s' has no value." % (item)) - def field_values_valid(self, validate): """ Checks all restricted fields have valid values for this type+version. @@ -334,7 +330,6 @@ def field_values_valid(self, validate): raise ValidationError("Header item '%s' has an invalid value \ of: %s" % (item, self.items[item])) - def validate(self, rules): """ Runs the different elements of header validation: @@ -366,7 +361,7 @@ class Body(object): """ def __init__(self, manifest, config): self.manifest = manifest - self.offset = 1 # start at one otherwise need to increment for header + self.offset = 1 # start at one otherwise need to increment for header manifest_dir = os.path.dirname(manifest) csv = import_module('csv') self.file_detail = [] @@ -385,7 +380,6 @@ def __init__(self, manifest, config): row, manifest_dir)) - def write(self, fp, config): """ Writes the body to a file-pointer in tsv and returns the values @@ -404,7 +398,6 @@ def write(self, fp, config): print("\t".join(row), file=fp) return for_json - def validate(self, rules): """ Runs the different elements of body validation: @@ -416,7 +409,6 @@ def validate(self, rules): self.uniq_files() self.file_ext_check(rules['validate_ext']) - def field_values_valid(self, validate): """ Check fields with restriced dict are valid @@ -454,7 +446,6 @@ def field_values_valid(self, validate): limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] += 1 evaulate_value_limits(field, chk, limit_chks) - def fields_have_values(self, rules): """ Check the fields listed as required are populated @@ -468,7 +459,6 @@ def fields_have_values(self, rules): '%s' on line %d ('.' not acceptable)" % (req, cnt)) - def uniq_files(self): """ Check all filenames are uniq within this manifest @@ -487,7 +477,6 @@ def uniq_files(self): % (f_type, item, cnt)) all_files.append(item) - def file_ext_check(self, rules): """ Check all files have valid extentions @@ -519,18 +508,16 @@ def file_ext_check(self, rules): % (last_ext, full_ext, cnt)) last_ext = full_ext - def heading_check(self, config): """ Simple check for correct, ordered headings for file rows. Here to minimise complexity of init """ if self.headings != config['ordered']: - raise ValidationError("Expected row headings of\n\t" - + ', '.join(config['ordered']) - + "\nbut got\n\t" - + ', '.join(self.headings)) - + raise ValidationError("Expected row headings of\n\t" + + ', '.join(config['ordered']) + + "\nbut got\n\t" + + ', '.join(self.headings)) def file_tests(self): """ diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py index 88d8cb2..84c479e 100644 --- a/cgp_seq_input_val/seq_validator.py +++ b/cgp_seq_input_val/seq_validator.py @@ -137,12 +137,12 @@ def validate_paired(self): if curr_line_a == '': if curr_line_b != '': raise SeqValidationError("Read 1 file finished before read 2") - break # if we get here both files are finished + break # if we get here both files are finished if curr_line_b == '': raise SeqValidationError("Read 2 file finished before read 1") self.pairs = pairs finally: - print(file=sys.stderr) # make sure we move to next line when progress finishes + print(file=sys.stderr) # make sure we move to next line when progress finishes if fq_fh_a is not None and not fq_fh_a.closed: fq_fh_a.close() if fq_fh_b is not None and not fq_fh_b.closed: @@ -212,8 +212,7 @@ def check_pair(self, read_1, read_2): \n\t%s (%s)\n\t%s (%s)" % (read_1.file_pos[0], read_2.file_pos[0], read_1.name, self.file_a, - read_2.name, self.file_b) - ) + read_2.name, self.file_b)) if read_1.end != '1': raise SeqValidationError("Fastq record at line %d of %s should be for first in pair, got '%s'" % (read_1.file_pos[0], self.file_a, read_1.end)) From 53ad6e03d8f5e7ef578069a67fd634838c97a121 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 12:18:11 +0100 Subject: [PATCH 11/37] Last style error --- cgp_seq_input_val/seq_validator.py | 2 +- tox.ini | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py index 84c479e..9d0b62a 100644 --- a/cgp_seq_input_val/seq_validator.py +++ b/cgp_seq_input_val/seq_validator.py @@ -189,7 +189,7 @@ def validate_interleaved(self): break self.pairs = pairs finally: - print(file=sys.stderr) # make sure we move to next line when progress finishes + print(file=sys.stderr) # make sure we move to next line when progress finishes if fq_fh is not None and not fq_fh.closed: fq_fh.close() diff --git a/tox.ini b/tox.ini index 4879cb6..d8b4df9 100644 --- a/tox.ini +++ b/tox.ini @@ -1,2 +1,5 @@ [pep8] max-line-length = 160 + +[pycodestyle] +max-line-length = 160 From 7d4cb05458640cc86791481a84e78217642a5166 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 12:28:40 +0100 Subject: [PATCH 12/37] Should add code coverage push to codeclimae as part of travis build --- .travis.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.travis.yml b/.travis.yml index dac145b..82d3d7d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,12 @@ notifications: slack: wtsi-cgpit:ptUMR1tkNyZJYd9TpGoss8WR email: false +env: + global: + - CC_TEST_REPORTER_ID=24af7ff1e88f3b70c8b9a5280ce9604d561dacd8eaa7b1d895128ca2bd724beb + language: python + python: - "3.6" @@ -11,5 +16,14 @@ install: - pip install progressbar2 - pip install xlrd + +before_script: + - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter + - chmod +x ./cc-test-reporter + - ./cc-test-reporter before-build + script: - nosetests --with-coverage --cover-erase --cover-package=cgp_seq_input_val + +after_script: + - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT From 0878d7724e7adf10c9430aea64ec03f670549229 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 12:37:50 +0100 Subject: [PATCH 13/37] don't delete the coverage file --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 82d3d7d..4309ccc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,7 +23,7 @@ before_script: - ./cc-test-reporter before-build script: - - nosetests --with-coverage --cover-erase --cover-package=cgp_seq_input_val + - nosetests --with-coverage --cover-package=cgp_seq_input_val after_script: - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT From 9b50e80fdd3b12c091039d7afd0fd67a7b08ee7a Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 12:47:20 +0100 Subject: [PATCH 14/37] convert raw coverage output to compatible form --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 4309ccc..1989da4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,7 @@ before_script: script: - nosetests --with-coverage --cover-package=cgp_seq_input_val + - coverage xml after_script: - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT From 32acd50a21d751a8940f5b09018c4c12166f251d Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 14:42:54 +0100 Subject: [PATCH 15/37] #Fix 3 --- .codeclimate.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.codeclimate.yml b/.codeclimate.yml index 91aab99..11857ae 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -23,3 +23,4 @@ exclude_paths: - "git-hooks/" - "data/" - "tests/" + - "pylintrc" From 9d60702b9bab468fa817cd71546919dcf643e399 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 27 Sep 2017 14:43:20 +0100 Subject: [PATCH 16/37] raise mass threshold --- .codeclimate.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.codeclimate.yml b/.codeclimate.yml index 11857ae..7207567 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -3,8 +3,8 @@ engines: enabled: true config: languages: - - python - #mass_threshold: 30 + python: + mass_threshold: 35 fixme: enabled: true markdownlint: From 5d14661ab828d46286d5edda310d63f4b19da246 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 29 Sep 2017 10:32:43 +0100 Subject: [PATCH 17/37] More local checks and changes to codeclimate config --- .codeclimate.yml | 3 +++ run_tests.sh | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.codeclimate.yml b/.codeclimate.yml index 7207567..06b5b21 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -4,6 +4,7 @@ engines: config: languages: python: + python_version: 3 mass_threshold: 35 fixme: enabled: true @@ -13,6 +14,8 @@ engines: enabled: true radon: enabled: true + config: + threshold: "C" ratings: paths: diff --git a/run_tests.sh b/run_tests.sh index 4cf70d2..49a7eec 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -6,7 +6,17 @@ if [ "$code" != "0" ]; then exit $code fi -echo -e "\n#################\n# Running pylint:\n" +# these should not die: + +echo -e "\n###################################" +echo "# Running radon (code complexity) #" +echo "###################################" +env/bin/radon cc -nc bin cgp_seq_input_val + +echo -e "\n##########################" +echo "# Running pylint (style) #" +echo "##########################" env/bin/pylint --output-format=colorized bin/*.py cgp_seq_input_val -echo -e "#\n#################" -exit 0 # don't die based on pylint + + +exit 0 # don't die based on assements of code quality From 7bc7af5edf09a3cb74d397089827b3f602d5c97c Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Mon, 9 Oct 2017 16:35:20 +0100 Subject: [PATCH 18/37] Add missing batch of tests and mark some code as not possible to cover (with reason) --- cgp_seq_input_val/cliutil.py | 10 ++++++++-- tests/cgp_seq_input_val_tests_cliutil.py | 22 ++++++++++++++++++++++ tests/data/cliutil/good.csv | 0 tests/data/cliutil/good.tsv | 0 tests/data/cliutil/good.xls | 0 tests/data/cliutil/good.xlsx | 0 6 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 tests/cgp_seq_input_val_tests_cliutil.py create mode 100644 tests/data/cliutil/good.csv create mode 100644 tests/data/cliutil/good.tsv create mode 100644 tests/data/cliutil/good.xls create mode 100644 tests/data/cliutil/good.xlsx diff --git a/cgp_seq_input_val/cliutil.py b/cgp_seq_input_val/cliutil.py index f82f1d7..a69fa28 100644 --- a/cgp_seq_input_val/cliutil.py +++ b/cgp_seq_input_val/cliutil.py @@ -13,8 +13,14 @@ def extn_check(parser, choices, fname, readable=False): try: handle = open(fname, 'r') handle.close() - except FileNotFoundError as error: + except FileNotFoundError as error: # pragma: no cover parser.error(error) - if extn not in choices: + if extn not in choices: # pragma: no cover parser.error("File doesn't end with {}".format(choices)) return fname + +""" +Why 'pragma: no cover' +to cover parser errors in test cases you have to add a fair amount of additional +code, as we know that raising an error this way is robust consider this covered. +""" diff --git a/tests/cgp_seq_input_val_tests_cliutil.py b/tests/cgp_seq_input_val_tests_cliutil.py new file mode 100644 index 0000000..d317eb4 --- /dev/null +++ b/tests/cgp_seq_input_val_tests_cliutil.py @@ -0,0 +1,22 @@ +from nose.tools import * +import os, sys, tempfile +import glob +#from argparse import Namespace + +from cgp_seq_input_val.cliutil import extn_check +from cgp_seq_input_val import constants + +import argparse + +test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'cliutil') + +def setup(): + pass + +def teardown(): + pass + +def test_extn_check_good(): + parser = argparse.ArgumentParser() + for f in glob.glob(os.path.join(test_dir, 'good.*')): + extn_check(parser, constants.MANIFEST_EXTNS, f, readable=True) diff --git a/tests/data/cliutil/good.csv b/tests/data/cliutil/good.csv new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/cliutil/good.tsv b/tests/data/cliutil/good.tsv new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/cliutil/good.xls b/tests/data/cliutil/good.xls new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/cliutil/good.xlsx b/tests/data/cliutil/good.xlsx new file mode 100644 index 0000000..e69de29 From dad0ff5449988f602df3c609d7f00df19484cfde Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Mon, 9 Oct 2017 16:37:36 +0100 Subject: [PATCH 19/37] Doc use of radon and fix comment --- README.md | 1 + cgp_seq_input_val/cliutil.py | 8 ++------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 10b3375..8532e2d 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ For testing/coverage (`./run_tests.sh`) ``` env/bin/pip install nose +env/bin/pip install radon env/bin/pip install coverage env/bin/pip install pylint ``` diff --git a/cgp_seq_input_val/cliutil.py b/cgp_seq_input_val/cliutil.py index a69fa28..e417a66 100644 --- a/cgp_seq_input_val/cliutil.py +++ b/cgp_seq_input_val/cliutil.py @@ -14,13 +14,9 @@ def extn_check(parser, choices, fname, readable=False): handle = open(fname, 'r') handle.close() except FileNotFoundError as error: # pragma: no cover + # can't cover these easily parser.error(error) if extn not in choices: # pragma: no cover + # can't cover these easily parser.error("File doesn't end with {}".format(choices)) return fname - -""" -Why 'pragma: no cover' -to cover parser errors in test cases you have to add a fair amount of additional -code, as we know that raising an error this way is robust consider this covered. -""" From a0e897103a59d17e86e24b0892215032d3e90fdc Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 10 Oct 2017 09:03:51 +0100 Subject: [PATCH 20/37] Change mass required for duplication check, as picking up exception raising --- .codeclimate.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.codeclimate.yml b/.codeclimate.yml index 06b5b21..656e321 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -5,7 +5,7 @@ engines: languages: python: python_version: 3 - mass_threshold: 35 + mass_threshold: 40 fixme: enabled: true markdownlint: From 2de1e4e80e947440ca93df6c42913ccbb59691be Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 10 Oct 2017 09:24:41 +0100 Subject: [PATCH 21/37] Cleanup lint and push duplication mass higher, may turn it off --- .codeclimate.yml | 2 +- cgp_seq_input_val/cliutil.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.codeclimate.yml b/.codeclimate.yml index 656e321..9fdff4c 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -5,7 +5,7 @@ engines: languages: python: python_version: 3 - mass_threshold: 40 + mass_threshold: 50 fixme: enabled: true markdownlint: diff --git a/cgp_seq_input_val/cliutil.py b/cgp_seq_input_val/cliutil.py index e417a66..3b94747 100644 --- a/cgp_seq_input_val/cliutil.py +++ b/cgp_seq_input_val/cliutil.py @@ -13,10 +13,10 @@ def extn_check(parser, choices, fname, readable=False): try: handle = open(fname, 'r') handle.close() - except FileNotFoundError as error: # pragma: no cover + except FileNotFoundError as error: # pragma: no cover # can't cover these easily parser.error(error) - if extn not in choices: # pragma: no cover + if extn not in choices: # pragma: no cover # can't cover these easily parser.error("File doesn't end with {}".format(choices)) return fname From 93ec7df1c1ffd02cf8a16db64180fa0e45ff2f37 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 10 Oct 2017 13:50:02 +0100 Subject: [PATCH 22/37] More ways to keep on top of code --- run_tests.sh | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/run_tests.sh b/run_tests.sh index 49a7eec..6f15bec 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -1,22 +1,23 @@ #!/usr/bin/env bash -env/bin/nosetests --with-coverage --cover-erase --cover-html --cover-package=cgp_seq_input_val -code=$? - -if [ "$code" != "0" ]; then - exit $code -fi +set -e +env/bin/nosetests --with-coverage --cover-erase --cover-html --cover-min-percentage=50 --cover-package=cgp_seq_input_val +set +e # these should not die: -echo -e "\n###################################" -echo "# Running radon (code complexity) #" -echo "###################################" -env/bin/radon cc -nc bin cgp_seq_input_val - echo -e "\n##########################" echo "# Running pylint (style) #" echo "##########################" env/bin/pylint --output-format=colorized bin/*.py cgp_seq_input_val +echo -e "\n#########################################" +echo "# Running radon (cyclomatic complexity) #" +echo "#########################################" +env/bin/radon cc -nc bin cgp_seq_input_val + +echo -e "\n#########################################" +echo "# Running radon (maintainability index) #" +echo "#########################################" +env/bin/radon mi -s -n B bin cgp_seq_input_val exit 0 # don't die based on assements of code quality From c2b91a480dafe7140bbb562c9dfc700874bd231c Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 11 Oct 2017 11:06:29 +0100 Subject: [PATCH 23/37] Reworked to use pytest and entry-points to do away with physical scripts --- .gitignore | 3 +- README.md | 34 ++- bin/normalise_manifest.py | 27 --- bin/validate_manifest.py | 38 --- bin/validate_seq_file.py | 49 ---- cgp_seq_input_val/cliutil.py | 4 +- cgp_seq_input_val/command_line.py | 60 +++++ cgp_seq_input_val/manifest.py | 12 + cgp_seq_input_val/seq_validator.py | 19 ++ run_tests.sh | 8 +- setup.py | 7 +- tests/cgp_seq_input_val_tests_cliutil.py | 22 -- tests/cgp_seq_input_val_tests_fastq_read.py | 47 ---- tests/cgp_seq_input_val_tests_manifest.py | 228 ------------------ .../cgp_seq_input_val_tests_seq_validator.py | 83 ------- tests/data/cliutil/bad.extn | 0 tests/test_cgp_seq_input_val_cliutil.py | 32 +++ tests/test_cgp_seq_input_val_fastq_read.py | 51 ++++ ...py => test_cgp_seq_input_val_file_meta.py} | 28 +-- tests/test_cgp_seq_input_val_manifest.py | 228 ++++++++++++++++++ ...py => test_cgp_seq_input_val_normalise.py} | 5 +- tests/test_cgp_seq_input_val_seq_validator.py | 83 +++++++ 22 files changed, 534 insertions(+), 534 deletions(-) delete mode 100755 bin/normalise_manifest.py delete mode 100755 bin/validate_manifest.py delete mode 100755 bin/validate_seq_file.py create mode 100644 cgp_seq_input_val/command_line.py delete mode 100644 tests/cgp_seq_input_val_tests_cliutil.py delete mode 100644 tests/cgp_seq_input_val_tests_fastq_read.py delete mode 100644 tests/cgp_seq_input_val_tests_manifest.py delete mode 100644 tests/cgp_seq_input_val_tests_seq_validator.py create mode 100644 tests/data/cliutil/bad.extn create mode 100644 tests/test_cgp_seq_input_val_cliutil.py create mode 100644 tests/test_cgp_seq_input_val_fastq_read.py rename tests/{cgp_seq_input_val_tests_file_meta.py => test_cgp_seq_input_val_file_meta.py} (55%) create mode 100644 tests/test_cgp_seq_input_val_manifest.py rename tests/{cgp_seq_input_val_tests_normalise.py => test_cgp_seq_input_val_normalise.py} (96%) create mode 100644 tests/test_cgp_seq_input_val_seq_validator.py diff --git a/.gitignore b/.gitignore index d100e53..97b2970 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,8 @@ __pycache__/ *.py[cod] *$py.class .coverage -cover/ +.cache +htmlcov/ /.eggs /build /dist diff --git a/README.md b/README.md index 8532e2d..d3d1257 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,9 @@ the driver code. ## Tools -### normalise_manifest.py +`cgpSeqInputVal` has multiple sub commands, listed with `cgpSeqInputVal --help`. + +### cgpSeqInputVal man-norm Takes input in multiple types and converts to tsv. If intput is tsv just copied the file to the output location (to simplify usage in workflows). Valid input types @@ -24,7 +26,7 @@ include: Absolutely no validation is carried out here. -### validate_manifest.py +### cgpSeqInputVal man-valid Takes the `tsv` representation of a manifest and performs validation of the structure and data values. The checks applied are managed by the `cgp_seq_input_val/config/*.json` @@ -37,7 +39,7 @@ The output is a lightly modified version of the input, adding: And a `json` version of the file ready for use by downstream systems. -### validate_seq_file.py +### cgpSeqInputVal seq-valid Takes an interleaved or a pair of paired-fastq files and produces a simple report of: @@ -108,17 +110,20 @@ in the base of the clone: cd $PROJECTROOT hash virtualenv || pip3 install virtualenv virtualenv -p python3 env -env/bin/pip install progressbar2 -env/bin/pip install xlrd +source env/bin/activate +pip install progressbar2 +pip install xlrd +python setup.py develop # so bin scripts can find module ``` For testing/coverage (`./run_tests.sh`) ``` -env/bin/pip install nose -env/bin/pip install radon -env/bin/pip install coverage -env/bin/pip install pylint +source env/bin/activate # if not already in env +pip install pytest +pip install pytest-cov +pip install pylint +pip install radon ``` __Also see__ [Package Dependancies](#package-dependancies) @@ -127,10 +132,13 @@ __Also see__ [Package Dependancies](#package-dependancies) __Make sure the version is incremented__ in `./setup.py` -The release is handled by setuptools: +The release is handled by wheel: ```bash -$ ./setup.py bdist_egg -# this creates an egg which can be copied to a deployment location, e.g. -scp dist/cgp_seq_input_val-0.1.0-py3.6.egg user@host:~/ +$ source env/bin/activate # if not already +$ python setup.py bdist_wheel -d dist +# this creates an wheel archive which can be copied to a deployment location, e.g. +$ scp cgp_seq_input_val-1.1.0-py3-none-any.whl user@host:~/wheels +# on host +$ pip install --find-links=~/wheels cgp_seq_input_val ``` diff --git a/bin/normalise_manifest.py b/bin/normalise_manifest.py deleted file mode 100755 index 9033fca..0000000 --- a/bin/normalise_manifest.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python3 - -"""Converts manifests from various types to TSV""" - -# python builtin -import argparse -import pkg_resources # part of setuptools - -# this project -from cgp_seq_input_val import constants, cliutil -from cgp_seq_input_val.manifest import normalise - -version = pkg_resources.require("cgp_seq_input_val")[0].version - -# read variables, auto help text -parser = argparse.ArgumentParser(description='Convert manifest files to common denominator (tsv)', - epilog='Input can be [xls|xlsx|csv|tsv]. "tsv" is just copied to maintain tool-chain') -parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) -parser.add_argument('-i', '--input', dest='input', metavar='FILE', - help='Input manifest in friendly formats', required=True, - type=lambda s: cliutil.extn_check(parser, constants.MANIFEST_EXTNS, s, readable=True)) -parser.add_argument('-o', '--output', dest='output', metavar='FILE', - help='Output file *.tsv [default: sub. extension]', required=False, - type=lambda s: cliutil.extn_check(parser, ('tsv'), s)) -args = parser.parse_args() - -normalise(args) diff --git a/bin/validate_manifest.py b/bin/validate_manifest.py deleted file mode 100755 index a753d58..0000000 --- a/bin/validate_manifest.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 - -"""Validates the normalised TSV manifest files""" - -# python builtin -import sys -import argparse -import pkg_resources # part of setuptools - -# this project -from cgp_seq_input_val import cliutil -from cgp_seq_input_val.manifest import Manifest -from cgp_seq_input_val.manifest import ValidationError - -version = pkg_resources.require("cgp_seq_input_val")[0].version - -# read variables, auto help text -parser = argparse.ArgumentParser(description='Validate a tsv import manifest file') -parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) -parser.add_argument('-i', '--input', dest='input', metavar='FILE', - help='Input manifest in tsv formats', required=True, - type=lambda s: cliutil.extn_check(parser, ('tsv'), s, readable=True)) -parser.add_argument('-o', '--output', dest='output', metavar='DIR', - help='Output manifest to this area, two files (tsv, json)', required=True) -parser.add_argument('-c', '--checkfiles', dest='checkfiles', action='store_true', - help='When present check file exist and are non-zero size') - -args = parser.parse_args() - -try: - manifest = Manifest(args.input) - manifest.validate() - # output new manifest in tsv and json. - (tsv_file, json_file) = manifest.write(args.output) - print("Created files:\n\t%s\n\t%s" % (tsv_file, json_file)) -except ValidationError as ve: - print("ERROR: " + str(ve), file=sys.stderr) - exit(1) diff --git a/bin/validate_seq_file.py b/bin/validate_seq_file.py deleted file mode 100755 index 65924b5..0000000 --- a/bin/validate_seq_file.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 - -""" -Validates up to 2 sequencing data files -When 2 found these should be paired fastq[.gz], -otherwise expecting interleaved fastq. - -May be extended to cover BAM/CRAM at a later date. -""" - -# python builtin -import sys -import argparse -import pkg_resources # part of setuptools - -# this project -from cgp_seq_input_val.seq_validator import SeqValidator -from cgp_seq_input_val.seq_validator import SeqValidationError - -version = pkg_resources.require("cgp_seq_input_val")[0].version - -parser = argparse.ArgumentParser(description="""Validates up to 2 sequencing data files.""") -parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) -parser.add_argument('-r', '--report', dest='report', type=argparse.FileType('w'), default='-', - help='Output json report', required=False) -parser.add_argument('-i', '--input', dest='input', metavar='FILE', nargs='+', - help='Input manifest in tsv formats', required=True) - -args = parser.parse_args() - -try: - file_2 = None - if len(args.input) == 2: - file_2 = args.input[1] - validator = SeqValidator(args.input[0], file_2) - validator.validate() - validator.report(args.report) -except SeqValidationError as ve: # runtime so no functions for message and errno - print("ERROR: " + str(ve), file=sys.stderr) - exit(1) -# have to catch 2 classes works 3.0-3.3, above 3.3 all IO issues are captured under OSError -except (OSError, IOError) as err: - print("ERROR: %s - %s" % (err.strerror, err.filename), file=sys.stderr) - exit(err.errno) - -# Interleaved fastq to paired: -# gnu-sed needed -# zcat 242215_i.fq.gz | gsed -n '1~8,+3p' | gzip -c > 242215_1.fq.gz -# zcat 242215_i.fq.gz | gsed -n '5~8,+3p' | gzip -c > 242215_2.fq.gz diff --git a/cgp_seq_input_val/cliutil.py b/cgp_seq_input_val/cliutil.py index 3b94747..25c821c 100644 --- a/cgp_seq_input_val/cliutil.py +++ b/cgp_seq_input_val/cliutil.py @@ -13,10 +13,10 @@ def extn_check(parser, choices, fname, readable=False): try: handle = open(fname, 'r') handle.close() - except FileNotFoundError as error: # pragma: no cover + except FileNotFoundError as error: # can't cover these easily parser.error(error) - if extn not in choices: # pragma: no cover + if extn not in choices: # can't cover these easily parser.error("File doesn't end with {}".format(choices)) return fname diff --git a/cgp_seq_input_val/command_line.py b/cgp_seq_input_val/command_line.py new file mode 100644 index 0000000..7cd6d2d --- /dev/null +++ b/cgp_seq_input_val/command_line.py @@ -0,0 +1,60 @@ +""" +Handle the command line parsing and select the correct sub process. +""" + +import argparse +import sys +import pkg_resources # part of setuptools + +from cgp_seq_input_val import constants, cliutil +from cgp_seq_input_val.manifest import normalise +from cgp_seq_input_val.manifest import wrapped_validate +from cgp_seq_input_val.seq_validator import validate_seq_files +version = pkg_resources.require("cgp_seq_input_val")[0].version + +def main(): + """ + Sets up the parser and handles triggereing of correct sub-command + """ + parser = argparse.ArgumentParser(prog='cgpSeqInputVal') + subparsers = parser.add_subparsers(help='sub-command help') + + # create the parser for the "man-norm" command + parser_a = subparsers.add_parser('man-norm', description='Convert manifest files to common denominator (tsv)', + epilog='Input can be [xls|xlsx|csv|tsv]. \ + "tsv" is just copied to maintain tool-chain') + parser_a.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) + parser_a.add_argument('-i', '--input', dest='input', metavar='FILE', + help='Input manifest in friendly formats', required=True, + type=lambda s: cliutil.extn_check(parser, constants.MANIFEST_EXTNS, s, readable=True)) + parser_a.add_argument('-o', '--output', dest='output', metavar='FILE', + help='Output file *.tsv [default: sub. extension]', required=False, + type=lambda s: cliutil.extn_check(parser, ('tsv'), s)) + parser_a.set_defaults(func=normalise) + + # create the parser for the "man-valid" command + parser_b = subparsers.add_parser('man-valid', description='Validate a tsv import manifest file') + parser_b.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) + parser_b.add_argument('-i', '--input', dest='input', metavar='FILE', + help='Input manifest in tsv formats', required=True, + type=lambda s: cliutil.extn_check(parser, ('tsv'), s, readable=True)) + parser_b.add_argument('-o', '--output', dest='output', metavar='DIR', + help='Output manifest to this area, two files (tsv, json)', required=True) + parser_b.add_argument('-c', '--checkfiles', dest='checkfiles', action='store_true', + help='When present check file exist and are non-zero size') + parser_b.set_defaults(func=wrapped_validate) + + # create the parser for the "seq-valid" command + parser_c = subparsers.add_parser('seq-valid', description='Validates up to 2 sequencing data files.') + parser_c.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) + parser_c.add_argument('-r', '--report', dest='report', type=argparse.FileType('w'), default='-', + help='Output json report', required=False) + parser_c.add_argument('-i', '--input', dest='input', metavar='FILE', nargs='+', + help='Input manifest in tsv formats', required=True) + parser_c.set_defaults(func=validate_seq_files) + + args = parser.parse_args() + if len(sys.argv) > 1: + args.func(args) + else: + sys.exit('\nERROR Arguments required\n\tPlease run: cgpSeqInputVal --help\n') diff --git a/cgp_seq_input_val/manifest.py b/cgp_seq_input_val/manifest.py index 263974f..809e0a1 100644 --- a/cgp_seq_input_val/manifest.py +++ b/cgp_seq_input_val/manifest.py @@ -20,6 +20,18 @@ VAL_LIM_CONFIG_ERROR = "'limit' and 'limit_by' must both be defined when either \ is present, check body.validate." +def wrapped_validate(args): + """ + Top level entry point for validating a manifest + """ + try: + manifest = Manifest(args.input) + manifest.validate() + # output new manifest in tsv and json. + (tsv_file, json_file) = manifest.write(args.output) + print("Created files:\n\t%s\n\t%s" % (tsv_file, json_file)) + except ValidationError as ve: + sys.exit("ERROR: " + str(ve)) def uuid4_chk(uuid_str): """Tests validity of uuid""" diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py index 9d0b62a..59ec1b2 100644 --- a/cgp_seq_input_val/seq_validator.py +++ b/cgp_seq_input_val/seq_validator.py @@ -16,6 +16,25 @@ prog_records = 100000 +def validate_seq_files(args): + """ + Top level entry point for validating sequence files. + """ + try: + file_2 = None + if len(args.input) == 2: + file_2 = args.input[1] + validator = SeqValidator(args.input[0], file_2) + validator.validate() + validator.report(args.report) + except SeqValidationError as ve: # runtime so no functions for message and errno + print("ERROR: " + str(ve), file=sys.stderr) + exit(1) + # have to catch 2 classes works 3.0-3.3, above 3.3 all IO issues are captured under OSError + except (OSError, IOError) as err: + print("ERROR: %s - %s" % (err.strerror, err.filename), file=sys.stderr) + exit(err.errno) + class SeqValidator(object): """ diff --git a/run_tests.sh b/run_tests.sh index 6f15bec..e9a90b5 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash set -e -env/bin/nosetests --with-coverage --cover-erase --cover-html --cover-min-percentage=50 --cover-package=cgp_seq_input_val +pytest --cov-report term --cov-report html --cov=cgp_seq_input_val --cov-fail-under=50 set +e # these should not die: @@ -8,16 +8,16 @@ set +e echo -e "\n##########################" echo "# Running pylint (style) #" echo "##########################" -env/bin/pylint --output-format=colorized bin/*.py cgp_seq_input_val +pylint --output-format=colorized cgp_seq_input_val echo -e "\n#########################################" echo "# Running radon (cyclomatic complexity) #" echo "#########################################" -env/bin/radon cc -nc bin cgp_seq_input_val +radon cc -nc cgp_seq_input_val echo -e "\n#########################################" echo "# Running radon (maintainability index) #" echo "#########################################" -env/bin/radon mi -s -n B bin cgp_seq_input_val +radon mi -s -n B cgp_seq_input_val exit 0 # don't die based on assements of code quality diff --git a/setup.py b/setup.py index fc2874b..9408ecc 100755 --- a/setup.py +++ b/setup.py @@ -11,12 +11,13 @@ 'author_email': 'cgphelp@sanger.ac.uk', 'version': '1.1.0', 'python_requires': '>= 3.3', - 'setup_requires': ['nose>=1.0'], + 'setup_requires': ['pytest'], 'install_requires': ['progressbar2', 'xlrd'], 'packages': ['cgp_seq_input_val'], 'package_data': {'cgp_seq_input_val': ['config/*.json']}, - 'scripts': ['bin/normalise_manifest.py', 'bin/validate_manifest.py', - 'bin/validate_seq_file.py'] + 'entry_points': { + 'console_scripts': ['cgpSeqInputVal=cgp_seq_input_val.command_line:main'], + } } setup(**config) diff --git a/tests/cgp_seq_input_val_tests_cliutil.py b/tests/cgp_seq_input_val_tests_cliutil.py deleted file mode 100644 index d317eb4..0000000 --- a/tests/cgp_seq_input_val_tests_cliutil.py +++ /dev/null @@ -1,22 +0,0 @@ -from nose.tools import * -import os, sys, tempfile -import glob -#from argparse import Namespace - -from cgp_seq_input_val.cliutil import extn_check -from cgp_seq_input_val import constants - -import argparse - -test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'cliutil') - -def setup(): - pass - -def teardown(): - pass - -def test_extn_check_good(): - parser = argparse.ArgumentParser() - for f in glob.glob(os.path.join(test_dir, 'good.*')): - extn_check(parser, constants.MANIFEST_EXTNS, f, readable=True) diff --git a/tests/cgp_seq_input_val_tests_fastq_read.py b/tests/cgp_seq_input_val_tests_fastq_read.py deleted file mode 100644 index 2ce3802..0000000 --- a/tests/cgp_seq_input_val_tests_fastq_read.py +++ /dev/null @@ -1,47 +0,0 @@ -from nose.tools import * -import os, sys, tempfile - -from cgp_seq_input_val.fastq_read import FastqRead -from cgp_seq_input_val.error_classes import SeqValidationError - -test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'fastq_read') - -def setup(): - pass - -def teardown(): - pass - -@raises(SeqValidationError) -def test_fastq_header_no_at(): - fqi = os.path.join(test_dir, 'bad_header_1.fq') - with open(fqi, 'r') as fp: - fr = FastqRead(fp, 0, None) - fr.validate('x') - -@raises(SeqValidationError) -def test_fastq_header_at_only(): - fqi = os.path.join(test_dir, 'bad_header_2.fq') - with open(fqi, 'r') as fp: - fr = FastqRead(fp, 0, None) - fr.validate('x') - -@raises(SeqValidationError) -def test_fastq_seq_shorter_than_qual(): - fqi = os.path.join(test_dir, 'seq-shorter_1.fq') - with open(fqi, 'r') as fp: - fr = FastqRead(fp, 0, None) - fr.validate('x') - -@raises(SeqValidationError) -def test_fastq_qual_shorter_than_seq(): - fqi = os.path.join(test_dir, 'qual-shorter_1.fq') - with open(fqi, 'r') as fp: - fr = FastqRead(fp, 0, None) - fr.validate('x') - -def test_fastq_string_print(): - fqi = os.path.join(test_dir, 'good_read_1.fq') - with open(fqi, 'r') as fp: - fr = FastqRead(fp, 0, None) - t = str(fr) diff --git a/tests/cgp_seq_input_val_tests_manifest.py b/tests/cgp_seq_input_val_tests_manifest.py deleted file mode 100644 index 5473a49..0000000 --- a/tests/cgp_seq_input_val_tests_manifest.py +++ /dev/null @@ -1,228 +0,0 @@ -from nose.tools import * -import sys, os, tempfile, shutil, json -from cgp_seq_input_val.manifest import Manifest, Header, Body, ConfigError, ParsingError, ValidationError -from argparse import Namespace - -data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'data') -test_data = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') -configs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'configs') - -def setup_args(indir, intype, tmpd): - ''' - Simplifies tests by generating standardised in/out for specified filetypes - ''' - return Namespace(input=os.path.join(indir, 'SimplifiedManifest_v1.0.%s' % (intype)), - output=os.path.join(tmpd, '%s_to.tsv' % (intype)) ) - -def setup(): - print("SETUP!") - -def teardown(): - print("TEAR DOWN!") - -### Manifest tests - -@raises(ValueError) -def test_manifest_bad_filetype(): - infile = os.path.join(data_dir, 'SimplifiedManifest_v1.0.xls') - manifest = Manifest(infile) - manifest.validate() - -@raises(ValidationError) -def test_manifest_missing_required(): - infile = os.path.join(data_dir, 'SimplifiedManifest_v1.0.tsv') - manifest = Manifest(infile) - manifest.validate() - -def test_manifest_write(): - with tempfile.TemporaryDirectory() as tmpd: - manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv')) - manifest.validate() - (tsv_file, json_file) = manifest.write(tmpd) # output new manifest in tsv and json. - -def test_manifest_uuid(): - with tempfile.TemporaryDirectory() as tmpd: - manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv')) - manifest.validate() - assert manifest.get_uuid() - -def test_manifest_existing_uuid(): - with tempfile.TemporaryDirectory() as tmpd: - manifest = Manifest(os.path.join(test_data, 'with_uuid.tsv')) - manifest.validate() - assert manifest.get_uuid() == '05218fd0-79e5-4214-92d5-e133cd16a798' - -@raises(ValidationError) -def test_manifest_existing_bad_uuid(): - with tempfile.TemporaryDirectory() as tmpd: - manifest = Manifest(os.path.join(test_data, 'with_bad_uuid.tsv')) - manifest.validate() - -@raises(ValidationError) -def test_manifest_uuid_novalidate(): - with tempfile.TemporaryDirectory() as tmpd: - manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv')) - assert manifest.get_uuid() - -### Config parsing tests - -@raises(ParsingError) -def test_manifest_get_config_bad_type(): - # need a good file to setup and then test get_config with a bad config file - header = Header(os.path.join(test_data, 'good_manifest.tsv')) - header.get_config(os.path.join(configs, 'bad_type', 'IMPORT-1.0.json')) - -@raises(ParsingError) -def test_manifest_get_config_bad_version(): - # need a good file to setup and then test get_config with a bad config file - header = Header(os.path.join(test_data, 'good_manifest.tsv')) - header.get_config(os.path.join(configs, 'bad_version', 'IMPORT-1.0.json')) - -@raises(ConfigError) -def test_manifest_json_no_body(): - # need a good file to setup and then test get_config with a bad config file - header = Header(os.path.join(test_data, 'good_manifest.tsv')) - header.get_config(os.path.join(configs, 'no_body', 'IMPORT-1.0.json')) - -@raises(ConfigError) -def test_manifest_json_no_expected(): - # need a good file to setup and then test get_config with a bad config file - header = Header(os.path.join(test_data, 'good_manifest.tsv')) - header.get_config(os.path.join(configs, 'no_expected', 'IMPORT-1.0.json')) - -@raises(ConfigError) -def test_manifest_json_no_header(): - # need a good file to setup and then test get_config with a bad config file - header = Header(os.path.join(test_data, 'good_manifest.tsv')) - header.get_config(os.path.join(configs, 'no_header', 'IMPORT-1.0.json')) - -@raises(ConfigError) -def test_manifest_json_no_required(): - # need a good file to setup and then test get_config with a bad config file - header = Header(os.path.join(test_data, 'good_manifest.tsv')) - header.get_config(os.path.join(configs, 'no_required', 'IMPORT-1.0.json')) - -@raises(ConfigError) -def test_manifest_json_no_validate(): - # need a good file to setup and then test get_config with a bad config file - header = Header(os.path.join(test_data, 'good_manifest.tsv')) - header.get_config(os.path.join(configs, 'no_validate', 'IMPORT-1.0.json')) - -@raises(ConfigError) -def test_manifest_json_limit_no_limit_by(): - # need a good file to setup and then test get_config with a bad config file - infile = os.path.join(test_data, - 'file_set_good', - 'files_good.tsv') - header = Header(infile) - cfg = header.get_config(os.path.join(configs, 'limit_no_limit_by', 'IMPORT-1.0.json')) - body = Body(infile, cfg['body']) - body.validate(cfg['body']) - -@raises(ConfigError) -def test_manifest_json_limit_by_no_limit(): - # need a good file to setup and then test get_config with a bad config file - infile = os.path.join(test_data, - 'file_set_good', - 'files_good.tsv') - header = Header(infile) - cfg = header.get_config(os.path.join(configs, 'limit_by_no_limit', 'IMPORT-1.0.json')) - body = Body(infile, cfg['body']) - body.validate(cfg['body']) - -### Header tests - -@raises(ValidationError) -def test_manifest_extra_header(): - # need a good file to setup and then test get_config with a bad config file - header = Header(os.path.join(test_data, 'extraHeader.tsv')) - config = header.get_config() - header.validate(config['header']) - -@raises(ValidationError) -def test_manifest_missing_header(): - # need a good file to setup and then test get_config with a bad config file - header = Header(os.path.join(test_data, 'missingHeader.tsv')) - config = header.get_config() - header.validate(config['header']) - -@raises(ValidationError) -def test_manifest_invalid_header_val(): - # need a good file to setup and then test get_config with a bad config file - header = Header(os.path.join(test_data, 'invalidHeaderVal.tsv')) - config = header.get_config() - header.validate(config['header']) - -### Body tests - -@raises(ValidationError) -def test_manifest_invalid_body_val(): - infile = os.path.join(test_data, 'invalidBodyVal.tsv') - manifest = Manifest(infile) - manifest.validate() - -@raises(ValidationError) -def test_manifest_absent_body_val(): - infile = os.path.join(test_data, 'absentBodyVal.tsv') - manifest = Manifest(infile) - manifest.validate() - -@raises(ValidationError) -def test_manifest_period_body_val(): - infile = os.path.join(test_data, 'periodBodyVal.tsv') - manifest = Manifest(infile) - manifest.validate() - -@raises(ValidationError) -def test_manifest_dup_files_same_row(): - infile = os.path.join(test_data, 'dupFilesSameRow.tsv') - manifest = Manifest(infile) - manifest.validate() - -@raises(ValidationError) -def test_manifest_dup_files_diff_row(): - infile = os.path.join(test_data, 'dupFilesDiffRow.tsv') - manifest = Manifest(infile) - manifest.validate() - -@raises(ValidationError) -def test_manifest_body_head_order(): - infile = os.path.join(test_data, 'bodyHeadOrder.tsv') - manifest = Manifest(infile) - manifest.validate() - -@raises(ValidationError) -def test_manifest_extn_file1(): - infile = os.path.join(test_data, 'invalidExtnFile1.tsv') - manifest = Manifest(infile) - manifest.validate() - -@raises(ValidationError) -def test_manifest_extn_file2(): - infile = os.path.join(test_data, 'invalidExtnFile2.tsv') - manifest = Manifest(infile) - manifest.validate() - -@raises(ValidationError) -def test_manifest_paired_extn_mismatch(): - infile = os.path.join(test_data, 'pairedExtnMismatch.tsv') - manifest = Manifest(infile) - manifest.validate() - -@raises(ValidationError) -def test_manifest_limit_exceeded(): - # need a good file to setup and then test get_config with a bad config file - infile = os.path.join(test_data, - 'file_set_good', - 'files_good.tsv') - header = Header(infile) - cfg = header.get_config(os.path.join(configs, 'limit_to_exceed', 'IMPORT-1.0.json')) - body = Body(infile, cfg['body']) - body.validate(cfg['body']) - -def test_manifest_file_set_good(): - infile = os.path.join(test_data, 'file_set_good', - 'files_good.tsv') - manifest = Manifest(infile) - manifest.validate(True) - as_json = json.dumps(manifest.for_json()) diff --git a/tests/cgp_seq_input_val_tests_seq_validator.py b/tests/cgp_seq_input_val_tests_seq_validator.py deleted file mode 100644 index 91ea5fe..0000000 --- a/tests/cgp_seq_input_val_tests_seq_validator.py +++ /dev/null @@ -1,83 +0,0 @@ -from nose.tools import * -import os, sys, tempfile - -from cgp_seq_input_val.seq_validator import SeqValidator -from cgp_seq_input_val.error_classes import SeqValidationError - -test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'fastq_read') - -def setup(): - pass - -def teardown(): - pass - -def test_seq_val_i_read_good(): - fqi = os.path.join(test_dir, 'good_read_i.fq') - sv = SeqValidator(fqi, None, progress_pairs=1) - sv.validate() - -def test_seq_val_p_read_good(): - fq1 = os.path.join(test_dir, 'good_read_1.fq') - fq2 = os.path.join(test_dir, 'good_read_2.fq') - sv = SeqValidator(fq1, fq2, progress_pairs=1) - sv.validate() - -def test_seq_val_i_gz_read_good(): - fqi = os.path.join(test_dir, 'good_read_i.fq.gz') - sv = SeqValidator(fqi, None, progress_pairs=0) - sv.validate() - t = str(sv) - sv.report(sys.stdout) - -def test_seq_val_p_gz_read_good(): - fq1 = os.path.join(test_dir, 'good_read_1.fq.gz') - fq2 = os.path.join(test_dir, 'good_read_2.fq.gz') - sv = SeqValidator(fq1, fq2, progress_pairs=0) - sv.validate() - -@raises(SeqValidationError) -def test_seq_val_bad_file(): - fqi = os.path.join(test_dir, 'good_read_i.BAD') - sv = SeqValidator(fqi, None, progress_pairs=0) - -@raises(SeqValidationError) -def test_seq_val_mismatch_ext(): - fq1 = os.path.join(test_dir, 'good_read_1.fq') - fq2 = os.path.join(test_dir, 'good_read_2.fq.gz') - sv = SeqValidator(fq1, fq2, progress_pairs=0) - -@raises(SeqValidationError) -def test_seq_val_more_read2(): - fq1 = os.path.join(test_dir, 'good_read_1.fq') - fq2 = os.path.join(test_dir, '2_reads_2.fq') - sv = SeqValidator(fq1, fq2, progress_pairs=0) - sv.validate() - -@raises(SeqValidationError) -def test_seq_val_more_read1(): - fq1 = os.path.join(test_dir, '2_reads_1.fq') - fq2 = os.path.join(test_dir, 'good_read_2.fq') - sv = SeqValidator(fq1, fq2, progress_pairs=0) - sv.validate() - -@raises(SeqValidationError) -def test_seq_val_r1_in_2(): - fq1 = os.path.join(test_dir, 'good_read_1.fq') - fq2 = os.path.join(test_dir, 'r1_reads_in_2.fq') - sv = SeqValidator(fq1, fq2, progress_pairs=0) - sv.validate() - -@raises(SeqValidationError) -def test_seq_val_r2_in_1(): - fq1 = os.path.join(test_dir, 'good_read_2.fq') - fq2 = os.path.join(test_dir, 'r2_reads_in_1.fq') - sv = SeqValidator(fq1, fq2, progress_pairs=0) - sv.validate() - -@raises(SeqValidationError) -def test_seq_val_fq_name(): - fq1 = os.path.join(test_dir, 'good_read_1.fq') - fq2 = os.path.join(test_dir, 'diff_2.fq') - sv = SeqValidator(fq1, fq2, progress_pairs=0) - sv.validate() diff --git a/tests/data/cliutil/bad.extn b/tests/data/cliutil/bad.extn new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_cgp_seq_input_val_cliutil.py b/tests/test_cgp_seq_input_val_cliutil.py new file mode 100644 index 0000000..38cf326 --- /dev/null +++ b/tests/test_cgp_seq_input_val_cliutil.py @@ -0,0 +1,32 @@ +import pytest +import os, sys, tempfile +import glob +#from argparse import Namespace + +from cgp_seq_input_val.cliutil import extn_check +from cgp_seq_input_val import constants + +import argparse + +test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'cliutil') + +def setup(): + pass + +def teardown(): + pass + +def test_extn_check_good_extn(): + parser = argparse.ArgumentParser() + for f in glob.glob(os.path.join(test_dir, 'good.*')): + extn_check(parser, constants.MANIFEST_EXTNS, f, readable=True) + +def test_extn_check_f_not_f(): + with pytest.raises(SystemExit) as e_info: + parser = argparse.ArgumentParser() + extn_check(parser, constants.MANIFEST_EXTNS, '/I_wont_exist_cgp_seq_input_val', readable=True) + +def test_extn_check_bad_extn(): + with pytest.raises(SystemExit) as e_info: + parser = argparse.ArgumentParser() + extn_check(parser, constants.MANIFEST_EXTNS, os.path.join(test_dir, 'bad.extn'), readable=True) diff --git a/tests/test_cgp_seq_input_val_fastq_read.py b/tests/test_cgp_seq_input_val_fastq_read.py new file mode 100644 index 0000000..4625720 --- /dev/null +++ b/tests/test_cgp_seq_input_val_fastq_read.py @@ -0,0 +1,51 @@ +import pytest +import os, sys, tempfile + +from cgp_seq_input_val.fastq_read import FastqRead +from cgp_seq_input_val.error_classes import SeqValidationError + +test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'fastq_read') + +def setup(): + pass + +def teardown(): + pass + +def test_fastq_header_no_at(): + with pytest.raises(SeqValidationError) as e_info: + fqi = os.path.join(test_dir, 'bad_header_1.fq') + with open(fqi, 'r') as fp: + fr = FastqRead(fp, 0, None) + fr.validate('x') + + +def test_fastq_header_at_only(): + with pytest.raises(SeqValidationError) as e_info: + fqi = os.path.join(test_dir, 'bad_header_2.fq') + with open(fqi, 'r') as fp: + fr = FastqRead(fp, 0, None) + fr.validate('x') + + +def test_fastq_seq_shorter_than_qual(): + with pytest.raises(SeqValidationError) as e_info: + fqi = os.path.join(test_dir, 'seq-shorter_1.fq') + with open(fqi, 'r') as fp: + fr = FastqRead(fp, 0, None) + fr.validate('x') + + +def test_fastq_qual_shorter_than_seq(): + with pytest.raises(SeqValidationError) as e_info: + fqi = os.path.join(test_dir, 'qual-shorter_1.fq') + with open(fqi, 'r') as fp: + fr = FastqRead(fp, 0, None) + fr.validate('x') + + +def test_fastq_string_print(): + fqi = os.path.join(test_dir, 'good_read_1.fq') + with open(fqi, 'r') as fp: + fr = FastqRead(fp, 0, None) + t = str(fr) diff --git a/tests/cgp_seq_input_val_tests_file_meta.py b/tests/test_cgp_seq_input_val_file_meta.py similarity index 55% rename from tests/cgp_seq_input_val_tests_file_meta.py rename to tests/test_cgp_seq_input_val_file_meta.py index f9f9ac5..b44a55b 100644 --- a/tests/cgp_seq_input_val_tests_file_meta.py +++ b/tests/test_cgp_seq_input_val_file_meta.py @@ -1,4 +1,4 @@ -from nose.tools import * +import pytest from cgp_seq_input_val.file_meta import FileValidationError, FileMeta import os, sys, tempfile @@ -10,22 +10,22 @@ def teardown(): ### FileMeta tests -@raises(FileValidationError) def test_file_meta_file_absent(): - headers = ["Group_ID", "Sample", "Normal_Tissue", "Group_Control", "Library", "File", "File_2"] - details = ["1", "Start", "Y", "Y", "1", "bello.bam"] - fm = FileMeta(headers, details, '/') - fm.test_files(1) + with pytest.raises(FileValidationError) as e_info: + headers = ["Group_ID", "Sample", "Normal_Tissue", "Group_Control", "Library", "File", "File_2"] + details = ["1", "Start", "Y", "Y", "1", "bello.bam"] + fm = FileMeta(headers, details, '/') + fm.test_files(1) -@raises(FileValidationError) def test_file_meta_file_empty(): - headers = ["Group_ID", "Sample", "Normal_Tissue", "Group_Control", "Library", "File", "File_2"] - details = ["1", "Start", "Y", "Y", "1", "bello.bam"] - with tempfile.TemporaryDirectory() as tmpd: - with open(os.path.join(tmpd, 'bello.bam'), 'w'): - pass - fm = FileMeta(headers, details, tmpd) - fm.test_files(1) + with pytest.raises(FileValidationError) as e_info: + headers = ["Group_ID", "Sample", "Normal_Tissue", "Group_Control", "Library", "File", "File_2"] + details = ["1", "Start", "Y", "Y", "1", "bello.bam"] + with tempfile.TemporaryDirectory() as tmpd: + with open(os.path.join(tmpd, 'bello.bam'), 'w'): + pass + fm = FileMeta(headers, details, tmpd) + fm.test_files(1) def test_file_meta_get_path(): headers = ["Group_ID", "Sample", "Normal_Tissue", "Group_Control", "Library", "File", "File_2"] diff --git a/tests/test_cgp_seq_input_val_manifest.py b/tests/test_cgp_seq_input_val_manifest.py new file mode 100644 index 0000000..6b624e0 --- /dev/null +++ b/tests/test_cgp_seq_input_val_manifest.py @@ -0,0 +1,228 @@ +import pytest +import sys, os, tempfile, shutil, json +from cgp_seq_input_val.manifest import Manifest, Header, Body, ConfigError, ParsingError, ValidationError +from argparse import Namespace + +data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'data') +test_data = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') +configs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'configs') + +def setup_args(indir, intype, tmpd): + ''' + Simplifies tests by generating standardised in/out for specified filetypes + ''' + return Namespace(input=os.path.join(indir, 'SimplifiedManifest_v1.0.%s' % (intype)), + output=os.path.join(tmpd, '%s_to.tsv' % (intype)) ) + +def setup(): + print("SETUP!") + +def teardown(): + print("TEAR DOWN!") + +### Manifest tests + +def test_manifest_bad_filetype(): + with pytest.raises(ValueError) as e_info: + infile = os.path.join(data_dir, 'SimplifiedManifest_v1.0.xls') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_missing_required(): + with pytest.raises(ValidationError) as e_info: + infile = os.path.join(data_dir, 'SimplifiedManifest_v1.0.tsv') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_write(): + with tempfile.TemporaryDirectory() as tmpd: + manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv')) + manifest.validate() + (tsv_file, json_file) = manifest.write(tmpd) # output new manifest in tsv and json. + +def test_manifest_uuid(): + with tempfile.TemporaryDirectory() as tmpd: + manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv')) + manifest.validate() + assert manifest.get_uuid() + +def test_manifest_existing_uuid(): + with tempfile.TemporaryDirectory() as tmpd: + manifest = Manifest(os.path.join(test_data, 'with_uuid.tsv')) + manifest.validate() + assert manifest.get_uuid() == '05218fd0-79e5-4214-92d5-e133cd16a798' + +def test_manifest_existing_bad_uuid(): + with pytest.raises(ValidationError) as e_info: + with tempfile.TemporaryDirectory() as tmpd: + manifest = Manifest(os.path.join(test_data, 'with_bad_uuid.tsv')) + manifest.validate() + +def test_manifest_uuid_novalidate(): + with pytest.raises(ValidationError) as e_info: + with tempfile.TemporaryDirectory() as tmpd: + manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv')) + assert manifest.get_uuid() + +### Config parsing tests + +def test_manifest_get_config_bad_type(): + with pytest.raises(ParsingError) as e_info: + # need a good file to setup and then test get_config with a bad config file + header = Header(os.path.join(test_data, 'good_manifest.tsv')) + header.get_config(os.path.join(configs, 'bad_type', 'IMPORT-1.0.json')) + +def test_manifest_get_config_bad_version(): + with pytest.raises(ParsingError) as e_info: + # need a good file to setup and then test get_config with a bad config file + header = Header(os.path.join(test_data, 'good_manifest.tsv')) + header.get_config(os.path.join(configs, 'bad_version', 'IMPORT-1.0.json')) + +def test_manifest_json_no_body(): + with pytest.raises(ConfigError) as e_info: + # need a good file to setup and then test get_config with a bad config file + header = Header(os.path.join(test_data, 'good_manifest.tsv')) + header.get_config(os.path.join(configs, 'no_body', 'IMPORT-1.0.json')) + +def test_manifest_json_no_expected(): + with pytest.raises(ConfigError) as e_info: + # need a good file to setup and then test get_config with a bad config file + header = Header(os.path.join(test_data, 'good_manifest.tsv')) + header.get_config(os.path.join(configs, 'no_expected', 'IMPORT-1.0.json')) + +def test_manifest_json_no_header(): + with pytest.raises(ConfigError) as e_info: + # need a good file to setup and then test get_config with a bad config file + header = Header(os.path.join(test_data, 'good_manifest.tsv')) + header.get_config(os.path.join(configs, 'no_header', 'IMPORT-1.0.json')) + +def test_manifest_json_no_required(): + with pytest.raises(ConfigError) as e_info: + # need a good file to setup and then test get_config with a bad config file + header = Header(os.path.join(test_data, 'good_manifest.tsv')) + header.get_config(os.path.join(configs, 'no_required', 'IMPORT-1.0.json')) + +def test_manifest_json_no_validate(): + with pytest.raises(ConfigError) as e_info: + # need a good file to setup and then test get_config with a bad config file + header = Header(os.path.join(test_data, 'good_manifest.tsv')) + header.get_config(os.path.join(configs, 'no_validate', 'IMPORT-1.0.json')) + +def test_manifest_json_limit_no_limit_by(): + with pytest.raises(ConfigError) as e_info: + # need a good file to setup and then test get_config with a bad config file + infile = os.path.join(test_data, + 'file_set_good', + 'files_good.tsv') + header = Header(infile) + cfg = header.get_config(os.path.join(configs, 'limit_no_limit_by', 'IMPORT-1.0.json')) + body = Body(infile, cfg['body']) + body.validate(cfg['body']) + +def test_manifest_json_limit_by_no_limit(): + with pytest.raises(ConfigError) as e_info: + # need a good file to setup and then test get_config with a bad config file + infile = os.path.join(test_data, + 'file_set_good', + 'files_good.tsv') + header = Header(infile) + cfg = header.get_config(os.path.join(configs, 'limit_by_no_limit', 'IMPORT-1.0.json')) + body = Body(infile, cfg['body']) + body.validate(cfg['body']) + +### Header tests + +def test_manifest_extra_header(): + with pytest.raises(ValidationError) as e_info: + # need a good file to setup and then test get_config with a bad config file + header = Header(os.path.join(test_data, 'extraHeader.tsv')) + config = header.get_config() + header.validate(config['header']) + +def test_manifest_missing_header(): + with pytest.raises(ValidationError) as e_info: + # need a good file to setup and then test get_config with a bad config file + header = Header(os.path.join(test_data, 'missingHeader.tsv')) + config = header.get_config() + header.validate(config['header']) + +def test_manifest_invalid_header_val(): + with pytest.raises(ValidationError) as e_info: + # need a good file to setup and then test get_config with a bad config file + header = Header(os.path.join(test_data, 'invalidHeaderVal.tsv')) + config = header.get_config() + header.validate(config['header']) + +### Body tests + +def test_manifest_invalid_body_val(): + with pytest.raises(ValidationError) as e_info: + infile = os.path.join(test_data, 'invalidBodyVal.tsv') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_absent_body_val(): + with pytest.raises(ValidationError) as e_info: + infile = os.path.join(test_data, 'absentBodyVal.tsv') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_period_body_val(): + with pytest.raises(ValidationError) as e_info: + infile = os.path.join(test_data, 'periodBodyVal.tsv') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_dup_files_same_row(): + with pytest.raises(ValidationError) as e_info: + infile = os.path.join(test_data, 'dupFilesSameRow.tsv') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_dup_files_diff_row(): + with pytest.raises(ValidationError) as e_info: + infile = os.path.join(test_data, 'dupFilesDiffRow.tsv') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_body_head_order(): + with pytest.raises(ValidationError) as e_info: + infile = os.path.join(test_data, 'bodyHeadOrder.tsv') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_extn_file1(): + with pytest.raises(ValidationError) as e_info: + infile = os.path.join(test_data, 'invalidExtnFile1.tsv') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_extn_file2(): + with pytest.raises(ValidationError) as e_info: + infile = os.path.join(test_data, 'invalidExtnFile2.tsv') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_paired_extn_mismatch(): + with pytest.raises(ValidationError) as e_info: + infile = os.path.join(test_data, 'pairedExtnMismatch.tsv') + manifest = Manifest(infile) + manifest.validate() + +def test_manifest_limit_exceeded(): + with pytest.raises(ValidationError) as e_info: + # need a good file to setup and then test get_config with a bad config file + infile = os.path.join(test_data, + 'file_set_good', + 'files_good.tsv') + header = Header(infile) + cfg = header.get_config(os.path.join(configs, 'limit_to_exceed', 'IMPORT-1.0.json')) + body = Body(infile, cfg['body']) + body.validate(cfg['body']) + +def test_manifest_file_set_good(): + infile = os.path.join(test_data, 'file_set_good', + 'files_good.tsv') + manifest = Manifest(infile) + manifest.validate(True) + as_json = json.dumps(manifest.for_json()) diff --git a/tests/cgp_seq_input_val_tests_normalise.py b/tests/test_cgp_seq_input_val_normalise.py similarity index 96% rename from tests/cgp_seq_input_val_tests_normalise.py rename to tests/test_cgp_seq_input_val_normalise.py index b0e8ba0..33a49e4 100644 --- a/tests/cgp_seq_input_val_tests_normalise.py +++ b/tests/test_cgp_seq_input_val_normalise.py @@ -1,4 +1,3 @@ -from nose.tools import * import sys, os, tempfile, shutil from cgp_seq_input_val.manifest import normalise from argparse import Namespace @@ -13,10 +12,10 @@ def setup_args(indir, intype, tmpd): output=os.path.join(tmpd, '%s_to.tsv' % (intype)) ) def setup(): - print("SETUP!") + pass def teardown(): - print("TEAR DOWN!") + pass def test_normalise_xls(): with tempfile.TemporaryDirectory() as tmpd: diff --git a/tests/test_cgp_seq_input_val_seq_validator.py b/tests/test_cgp_seq_input_val_seq_validator.py new file mode 100644 index 0000000..9f18b56 --- /dev/null +++ b/tests/test_cgp_seq_input_val_seq_validator.py @@ -0,0 +1,83 @@ +import pytest +import os, sys, tempfile + +from cgp_seq_input_val.seq_validator import SeqValidator +from cgp_seq_input_val.error_classes import SeqValidationError + +test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'fastq_read') + +def setup(): + pass + +def teardown(): + pass + +def test_seq_val_i_read_good(): + fqi = os.path.join(test_dir, 'good_read_i.fq') + sv = SeqValidator(fqi, None, progress_pairs=1) + sv.validate() + +def test_seq_val_p_read_good(): + fq1 = os.path.join(test_dir, 'good_read_1.fq') + fq2 = os.path.join(test_dir, 'good_read_2.fq') + sv = SeqValidator(fq1, fq2, progress_pairs=1) + sv.validate() + +def test_seq_val_i_gz_read_good(): + fqi = os.path.join(test_dir, 'good_read_i.fq.gz') + sv = SeqValidator(fqi, None, progress_pairs=0) + sv.validate() + t = str(sv) + sv.report(sys.stdout) + +def test_seq_val_p_gz_read_good(): + fq1 = os.path.join(test_dir, 'good_read_1.fq.gz') + fq2 = os.path.join(test_dir, 'good_read_2.fq.gz') + sv = SeqValidator(fq1, fq2, progress_pairs=0) + sv.validate() + +def test_seq_val_bad_file(): + with pytest.raises(SeqValidationError) as e_info: + fqi = os.path.join(test_dir, 'good_read_i.BAD') + sv = SeqValidator(fqi, None, progress_pairs=0) + +def test_seq_val_mismatch_ext(): + with pytest.raises(SeqValidationError) as e_info: + fq1 = os.path.join(test_dir, 'good_read_1.fq') + fq2 = os.path.join(test_dir, 'good_read_2.fq.gz') + sv = SeqValidator(fq1, fq2, progress_pairs=0) + +def test_seq_val_more_read2(): + with pytest.raises(SeqValidationError) as e_info: + fq1 = os.path.join(test_dir, 'good_read_1.fq') + fq2 = os.path.join(test_dir, '2_reads_2.fq') + sv = SeqValidator(fq1, fq2, progress_pairs=0) + sv.validate() + +def test_seq_val_more_read1(): + with pytest.raises(SeqValidationError) as e_info: + fq1 = os.path.join(test_dir, '2_reads_1.fq') + fq2 = os.path.join(test_dir, 'good_read_2.fq') + sv = SeqValidator(fq1, fq2, progress_pairs=0) + sv.validate() + +def test_seq_val_r1_in_2(): + with pytest.raises(SeqValidationError) as e_info: + fq1 = os.path.join(test_dir, 'good_read_1.fq') + fq2 = os.path.join(test_dir, 'r1_reads_in_2.fq') + sv = SeqValidator(fq1, fq2, progress_pairs=0) + sv.validate() + +def test_seq_val_r2_in_1(): + with pytest.raises(SeqValidationError) as e_info: + fq1 = os.path.join(test_dir, 'good_read_2.fq') + fq2 = os.path.join(test_dir, 'r2_reads_in_1.fq') + sv = SeqValidator(fq1, fq2, progress_pairs=0) + sv.validate() + +def test_seq_val_fq_name(): + with pytest.raises(SeqValidationError) as e_info: + fq1 = os.path.join(test_dir, 'good_read_1.fq') + fq2 = os.path.join(test_dir, 'diff_2.fq') + sv = SeqValidator(fq1, fq2, progress_pairs=0) + sv.validate() From b39eb1106605221dc56ade888c4eb8a7d76bd0be Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 11 Oct 2017 11:24:29 +0100 Subject: [PATCH 24/37] remove use of exit --- cgp_seq_input_val/seq_validator.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py index 59ec1b2..f7d8834 100644 --- a/cgp_seq_input_val/seq_validator.py +++ b/cgp_seq_input_val/seq_validator.py @@ -28,12 +28,10 @@ def validate_seq_files(args): validator.validate() validator.report(args.report) except SeqValidationError as ve: # runtime so no functions for message and errno - print("ERROR: " + str(ve), file=sys.stderr) - exit(1) + sys.exit("ERROR: " + str(ve)) # have to catch 2 classes works 3.0-3.3, above 3.3 all IO issues are captured under OSError except (OSError, IOError) as err: - print("ERROR: %s - %s" % (err.strerror, err.filename), file=sys.stderr) - exit(err.errno) + sys.exit("ERROR (%d): %s - %s" % (err.errno, err.strerror, err.filename)) class SeqValidator(object): From b6d07c4658605f952dc1c4fc315eede3ccd18a6b Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 11 Oct 2017 12:11:38 +0100 Subject: [PATCH 25/37] Switch from pylint to pep8 for better alignment with python style guide and code-climate --- README.md | 2 +- cgp_seq_input_val/command_line.py | 85 ++++-- cgp_seq_input_val/manifest.py | 46 ++-- cgp_seq_input_val/seq_validator.py | 10 +- pylintrc | 425 ----------------------------- run_tests.sh | 6 +- tox.ini | 4 +- 7 files changed, 104 insertions(+), 474 deletions(-) delete mode 100644 pylintrc diff --git a/README.md b/README.md index d3d1257..a305185 100644 --- a/README.md +++ b/README.md @@ -122,7 +122,7 @@ For testing/coverage (`./run_tests.sh`) source env/bin/activate # if not already in env pip install pytest pip install pytest-cov -pip install pylint +pip install pep8 pip install radon ``` diff --git a/cgp_seq_input_val/command_line.py b/cgp_seq_input_val/command_line.py index 7cd6d2d..38a3a07 100644 --- a/cgp_seq_input_val/command_line.py +++ b/cgp_seq_input_val/command_line.py @@ -12,6 +12,7 @@ from cgp_seq_input_val.seq_validator import validate_seq_files version = pkg_resources.require("cgp_seq_input_val")[0].version + def main(): """ Sets up the parser and handles triggereing of correct sub-command @@ -20,37 +21,75 @@ def main(): subparsers = parser.add_subparsers(help='sub-command help') # create the parser for the "man-norm" command - parser_a = subparsers.add_parser('man-norm', description='Convert manifest files to common denominator (tsv)', - epilog='Input can be [xls|xlsx|csv|tsv]. \ - "tsv" is just copied to maintain tool-chain') - parser_a.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) - parser_a.add_argument('-i', '--input', dest='input', metavar='FILE', - help='Input manifest in friendly formats', required=True, - type=lambda s: cliutil.extn_check(parser, constants.MANIFEST_EXTNS, s, readable=True)) - parser_a.add_argument('-o', '--output', dest='output', metavar='FILE', - help='Output file *.tsv [default: sub. extension]', required=False, + parser_a = subparsers.\ + add_parser('man-norm', + description='Convert manifest files to common denominator (tsv)', + epilog='Input can be [xls|xlsx|csv|tsv]. \ + "tsv" is just copied to maintain tool-chain') + parser_a.add_argument('-v', '--version', + action='version', + version='%(prog)s ' + version) + parser_a.add_argument('-i', '--input', + dest='input', + metavar='FILE', + help='Input manifest in friendly formats', + required=True, + type=lambda s: cliutil.extn_check(parser, + constants.MANIFEST_EXTNS, + s, + readable=True)) + parser_a.add_argument('-o', '--output', + dest='output', + metavar='FILE', + help='Output file *.tsv [default: sub. extension]', + required=False, type=lambda s: cliutil.extn_check(parser, ('tsv'), s)) parser_a.set_defaults(func=normalise) # create the parser for the "man-valid" command - parser_b = subparsers.add_parser('man-valid', description='Validate a tsv import manifest file') - parser_b.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) - parser_b.add_argument('-i', '--input', dest='input', metavar='FILE', - help='Input manifest in tsv formats', required=True, - type=lambda s: cliutil.extn_check(parser, ('tsv'), s, readable=True)) - parser_b.add_argument('-o', '--output', dest='output', metavar='DIR', - help='Output manifest to this area, two files (tsv, json)', required=True) - parser_b.add_argument('-c', '--checkfiles', dest='checkfiles', action='store_true', + parser_b = subparsers.add_parser('man-valid', + description='Validate a tsv import manifest file') + parser_b.add_argument('-v', '--version', + action='version', + version='%(prog)s ' + version) + parser_b.add_argument('-i', '--input', + dest='input', + metavar='FILE', + help='Input manifest in tsv formats', + required=True, + type=lambda s: cliutil.extn_check(parser, + ('tsv'), + s, + readable=True)) + parser_b.add_argument('-o', '--output', + dest='output', + metavar='DIR', + help='Output manifest to this area, two files (tsv, json)', + required=True) + parser_b.add_argument('-c', '--checkfiles', + dest='checkfiles', + action='store_true', help='When present check file exist and are non-zero size') parser_b.set_defaults(func=wrapped_validate) # create the parser for the "seq-valid" command - parser_c = subparsers.add_parser('seq-valid', description='Validates up to 2 sequencing data files.') - parser_c.add_argument('-v', '--version', action='version', version='%(prog)s ' + version) - parser_c.add_argument('-r', '--report', dest='report', type=argparse.FileType('w'), default='-', - help='Output json report', required=False) - parser_c.add_argument('-i', '--input', dest='input', metavar='FILE', nargs='+', - help='Input manifest in tsv formats', required=True) + parser_c = subparsers.add_parser('seq-valid', + description='Validates up to 2 sequencing data files.') + parser_c.add_argument('-v', '--version', + action='version', + version='%(prog)s ' + version) + parser_c.add_argument('-r', '--report', + dest='report', + type=argparse.FileType('w'), + default='-', + help='Output json report', + required=False) + parser_c.add_argument('-i', '--input', + dest='input', + metavar='FILE', + nargs='+', + help='Input manifest in tsv formats', + required=True) parser_c.set_defaults(func=validate_seq_files) args = parser.parse_args() diff --git a/cgp_seq_input_val/manifest.py b/cgp_seq_input_val/manifest.py index 809e0a1..0b17e51 100644 --- a/cgp_seq_input_val/manifest.py +++ b/cgp_seq_input_val/manifest.py @@ -12,7 +12,9 @@ from pkg_resources import resource_string, resource_filename from cgp_seq_input_val import constants -from cgp_seq_input_val.error_classes import ConfigError, ParsingError, ValidationError +from cgp_seq_input_val.error_classes import (ConfigError, + ParsingError, + ValidationError) from cgp_seq_input_val.file_meta import FileMeta VAL_LIM_ERROR = "Only %d sample(s) with a value of '%s' is allowed in column \ @@ -20,6 +22,7 @@ VAL_LIM_CONFIG_ERROR = "'limit' and 'limit_by' must both be defined when either \ is present, check body.validate." + def wrapped_validate(args): """ Top level entry point for validating a manifest @@ -33,6 +36,7 @@ def wrapped_validate(args): except ValidationError as ve: sys.exit("ERROR: " + str(ve)) + def uuid4_chk(uuid_str): """Tests validity of uuid""" try: @@ -440,22 +444,30 @@ def field_values_valid(self, validate): % (field, fd.attributes[field], cnt)) # Construct value occurence limiting counts for val_limit in chk: - if 'limit' in val_limit or 'limit_by' in val_limit: - if 'limit' not in val_limit or 'limit_by' not in val_limit: - raise ConfigError(VAL_LIM_CONFIG_ERROR+field) - - if fd.attributes[field] != val_limit['value']: - continue - - lim_chk_lookup = field + '_' + val_limit['value'] - limit_by_value = fd.attributes[val_limit['limit_by']] - if lim_chk_lookup not in limit_chks: - limit_chks[lim_chk_lookup] = {} - if limit_by_value not in limit_chks[lim_chk_lookup]: - limit_chks[lim_chk_lookup][limit_by_value] = {} - if fd.attributes['Sample'] not in limit_chks[lim_chk_lookup][limit_by_value]: - limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] = 0 - limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] += 1 + if 'limit' not in val_limit and 'limit_by' not in val_limit: + # if in neither skip + continue + + if 'limit' not in val_limit or 'limit_by' not in val_limit: + # must be found in both + raise ConfigError(VAL_LIM_CONFIG_ERROR+field) + + if fd.attributes[field] != val_limit['value']: + continue + + lim_chk_lookup = field + '_' + val_limit['value'] + limit_by_value = fd.attributes[val_limit['limit_by']] + + # handled things we've not seen yet + if lim_chk_lookup not in limit_chks: + limit_chks[lim_chk_lookup] = {} + if limit_by_value not in limit_chks[lim_chk_lookup]: + limit_chks[lim_chk_lookup][limit_by_value] = {} + + if fd.attributes['Sample'] not in limit_chks[lim_chk_lookup][limit_by_value]: + limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] = 0 + limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] += 1 + evaulate_value_limits(field, chk, limit_chks) def fields_have_values(self, rules): diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py index f7d8834..04156f1 100644 --- a/cgp_seq_input_val/seq_validator.py +++ b/cgp_seq_input_val/seq_validator.py @@ -16,6 +16,7 @@ prog_records = 100000 + def validate_seq_files(args): """ Top level entry point for validating sequence files. @@ -225,17 +226,20 @@ def check_pair(self, read_1, read_2): self.q_min = q_min if read_1.name != read_2.name: - raise SeqValidationError("Fastq record name at line %d should be a match to paired file line %s:\ + raise SeqValidationError("Fastq record name at line %d should be a \ + match to paired file line %s:\ \n\t%s (%s)\n\t%s (%s)" % (read_1.file_pos[0], read_2.file_pos[0], read_1.name, self.file_a, read_2.name, self.file_b)) if read_1.end != '1': - raise SeqValidationError("Fastq record at line %d of %s should be for first in pair, got '%s'" + raise SeqValidationError("Fastq record at line %d of %s should be \ + for first in pair, got '%s'" % (read_1.file_pos[0], self.file_a, read_1.end)) if read_2.end != '2': - raise SeqValidationError("Fastq record at line %d of %s should be for second in pair, got '%s'" + raise SeqValidationError("Fastq record at line %d of %s should be \ + for second in pair, got '%s'" % (read_2.file_pos[0], self.file_b, read_2.end)) def setup_progress(self): diff --git a/pylintrc b/pylintrc deleted file mode 100644 index 23d1be8..0000000 --- a/pylintrc +++ /dev/null @@ -1,425 +0,0 @@ -[MASTER] - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code -extension-pkg-whitelist= - -# Add files or directories to the blacklist. They should be base names, not -# paths. -ignore=CVS - -# Add files or directories matching the regex patterns to the blacklist. The -# regex matches against base names, not paths. -ignore-patterns= - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. -jobs=1 - -# List of plugins (as comma separated values of python modules names) to load, -# usually to register additional checkers. -load-plugins= - -# Pickle collected data for later comparisons. -persistent=yes - -# Specify a configuration file. -#rcfile= - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED -confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once).You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use"--disable=all --enable=classes -# --disable=W" -disable=print-statement,parameter-unpacking,unpacking-in-except,old-raise-syntax,backtick,long-suffix,old-ne-operator,old-octal-literal,import-star-module-level,raw-checker-failed,bad-inline-option,locally-disabled,locally-enabled,file-ignored,suppressed-message,useless-suppression,deprecated-pragma,apply-builtin,basestring-builtin,buffer-builtin,cmp-builtin,coerce-builtin,execfile-builtin,file-builtin,long-builtin,raw_input-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,no-absolute-import,old-division,dict-iter-method,dict-view-method,next-method-called,metaclass-assignment,indexing-exception,raising-string,reload-builtin,oct-method,hex-method,nonzero-method,cmp-method,input-builtin,round-builtin,intern-builtin,unichr-builtin,map-builtin-not-iterating,zip-builtin-not-iterating,range-builtin-not-iterating,filter-builtin-not-iterating,using-cmp-argument,eq-without-hash,div-method,idiv-method,rdiv-method,exception-message-attribute,invalid-str-codec,sys-max-int,bad-python3-import,deprecated-string-function,deprecated-str-translate-call,invalid-name,too-few-public-methods,blacklisted-name - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable= - - -[REPORTS] - -# Python expression which should return a note less than 10 (10 is the highest -# note). You have access to the variables errors warning, statement which -# respectively contain the number of errors / warnings messages and the total -# number of statements analyzed. This is used by the global evaluation report -# (RP0004). -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio).You can also give a reporter class, eg -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages -reports=no - -# Activate the evaluation score. -score=yes - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - - -[BASIC] - -# Naming hint for argument names -argument-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Regular expression matching correct argument names -argument-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Naming hint for attribute names -attr-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Regular expression matching correct attribute names -attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Bad variable names which should always be refused, separated by a comma -bad-names=foo,bar,baz,toto,tutu,tata - -# Naming hint for class attribute names -class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ - -# Regular expression matching correct class attribute names -class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ - -# Naming hint for class names -class-name-hint=[A-Z_][a-zA-Z0-9]+$ - -# Regular expression matching correct class names -class-rgx=[A-Z_][a-zA-Z0-9]+$ - -# Naming hint for constant names -const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ - -# Regular expression matching correct constant names -const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Naming hint for function names -function-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Regular expression matching correct function names -function-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Good variable names which should always be accepted, separated by a comma -good-names=i,j,k,ex,Run,_ - -# Include a hint for the correct naming format with invalid-name -include-naming-hint=no - -# Naming hint for inline iteration names -inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ - -# Regular expression matching correct inline iteration names -inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ - -# Naming hint for method names -method-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Regular expression matching correct method names -method-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Naming hint for module names -module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ - -# Regular expression matching correct module names -module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=^_ - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -property-classes=abc.abstractproperty - -# Naming hint for variable names -variable-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Regular expression matching correct variable names -variable-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - - -[FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )??$ - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=120 - -# Maximum number of lines in a module -max-module-lines=1000 - -# List of optional constructs for which whitespace checking is disabled. `dict- -# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. -# `trailing-comma` allows a space between comma and closing bracket: (a, ). -# `empty-line` allows space-only lines. -no-space-check=trailing-comma,dict-separator - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=no - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=no - - -[LOGGING] - -# Logging modules to check that the string format arguments are in logging -# function parameter format -logging-modules=logging - - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=FIXME,XXX,TODO - - -[SIMILARITIES] - -# Ignore comments when computing similarities. -ignore-comments=yes - -# Ignore docstrings when computing similarities. -ignore-docstrings=yes - -# Ignore imports when computing similarities. -ignore-imports=no - -# Minimum lines number of a similarity. -min-similarity-lines=4 - - -[SPELLING] - -# Spelling dictionary name. Available dictionaries: none. To make it working -# install python-enchant package. -spelling-dict= - -# List of comma separated words that should not be checked. -spelling-ignore-words= - -# A path to a file that contains private dictionary; one word per line. -spelling-private-dict-file= - -# Tells whether to store unknown words to indicated private dictionary in -# --spelling-private-dict-file option instead of raising a message. -spelling-store-unknown-words=no - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members= - -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=yes - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=yes - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values,thread._local,_thread._local - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis. It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules= - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=yes - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - - -[VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid to define new builtins when possible. -additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=yes - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_,_cb - -# A regular expression matching the name of dummy variables (i.e. expectedly -# not used). -dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ - -# Argument names that match this expression will be ignored. Default to name -# with leading underscore -ignored-argument-names=_.*|^ignored_|^unused_ - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,future.builtins - - -[CLASSES] - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__,__new__,setUp - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict,_fields,_replace,_source,_make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=mcs - - -[DESIGN] - -# Maximum number of arguments for function / method -max-args=5 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in a if statement -max-bool-expr=5 - -# Maximum number of branch for function / method body -max-branches=12 - -# Maximum number of locals for function / method body -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=7 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body -max-returns=6 - -# Maximum number of statements in function / method body -max-statements=50 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=2 - - -[IMPORTS] - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Deprecated modules which should not be used, separated by a comma -deprecated-modules=optparse,tkinter.tix - -# Create a graph of external dependencies in the given file (report RP0402 must -# not be disabled) -ext-import-graph= - -# Create a graph of every (i.e. internal and external) dependencies in the -# given file (report RP0402 must not be disabled) -import-graph= - -# Create a graph of internal dependencies in the given file (report RP0402 must -# not be disabled) -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "Exception" -overgeneral-exceptions=Exception diff --git a/run_tests.sh b/run_tests.sh index e9a90b5..1b8caca 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -6,9 +6,9 @@ set +e # these should not die: echo -e "\n##########################" -echo "# Running pylint (style) #" +echo "# Running pep8 (style) #" echo "##########################" -pylint --output-format=colorized cgp_seq_input_val +pep8 --format=pylint cgp_seq_input_val echo -e "\n#########################################" echo "# Running radon (cyclomatic complexity) #" @@ -18,6 +18,6 @@ radon cc -nc cgp_seq_input_val echo -e "\n#########################################" echo "# Running radon (maintainability index) #" echo "#########################################" -radon mi -s -n B cgp_seq_input_val +radon mi -s cgp_seq_input_val exit 0 # don't die based on assements of code quality diff --git a/tox.ini b/tox.ini index d8b4df9..48185ec 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [pep8] -max-line-length = 160 +max-line-length = 99 [pycodestyle] -max-line-length = 160 +max-line-length = 99 From a210c93b767d04a2c317a09624f1b7e5aa2ba76e Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 11 Oct 2017 13:59:06 +0100 Subject: [PATCH 26/37] Fix install docs --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a305185..f31f3ca 100644 --- a/README.md +++ b/README.md @@ -66,16 +66,15 @@ though. ## INSTALL -Installation is via `easy_install`. Simply execute with the path to the compiled -'egg': +Installation is via `pip`. Simply execute with the path to the packaged distribution: ```bash -easy_install bundles/cgp_seq_input_val-0.1.0-py3.6.egg +pip install --find-links=~/wheels cgp_seq_input_val ``` ### Package Dependancies -`easy_install` will install the relevant dependancies, listed here for convenience: +`pip` will install the relevant dependancies, listed here for convenience: * [progressbar2](http://progressbar-2.readthedocs.io/en/latest/) * [xlrd](https://github.com/python-excel/xlrd) @@ -99,7 +98,7 @@ You can run the same checks manually without a commit by executing the following in the base of the clone: ```bash -./run_tests.py +./run_tests.psh ``` ### Development Dependencies From 9b85d81cbd4b93e5de21ec2d136b2e2edaf3b70e Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 11 Oct 2017 14:08:04 +0100 Subject: [PATCH 27/37] Use a common parser for version --- cgp_seq_input_val/command_line.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cgp_seq_input_val/command_line.py b/cgp_seq_input_val/command_line.py index 38a3a07..a6dcbbb 100644 --- a/cgp_seq_input_val/command_line.py +++ b/cgp_seq_input_val/command_line.py @@ -17,18 +17,22 @@ def main(): """ Sets up the parser and handles triggereing of correct sub-command """ - parser = argparse.ArgumentParser(prog='cgpSeqInputVal') + common_parser = argparse.ArgumentParser('parent', add_help=False) + common_parser.add_argument('-v', '--version', + action='version', + version='%(prog)s ' + version) + + parser = argparse.ArgumentParser(prog='cgpSeqInputVal', parents=[common_parser]) + subparsers = parser.add_subparsers(help='sub-command help') # create the parser for the "man-norm" command parser_a = subparsers.\ add_parser('man-norm', + parents=[common_parser], description='Convert manifest files to common denominator (tsv)', epilog='Input can be [xls|xlsx|csv|tsv]. \ "tsv" is just copied to maintain tool-chain') - parser_a.add_argument('-v', '--version', - action='version', - version='%(prog)s ' + version) parser_a.add_argument('-i', '--input', dest='input', metavar='FILE', @@ -48,10 +52,8 @@ def main(): # create the parser for the "man-valid" command parser_b = subparsers.add_parser('man-valid', + parents=[common_parser], description='Validate a tsv import manifest file') - parser_b.add_argument('-v', '--version', - action='version', - version='%(prog)s ' + version) parser_b.add_argument('-i', '--input', dest='input', metavar='FILE', @@ -74,10 +76,8 @@ def main(): # create the parser for the "seq-valid" command parser_c = subparsers.add_parser('seq-valid', + parents=[common_parser], description='Validates up to 2 sequencing data files.') - parser_c.add_argument('-v', '--version', - action='version', - version='%(prog)s ' + version) parser_c.add_argument('-r', '--report', dest='report', type=argparse.FileType('w'), From 53fabb416dba10d8c8de8ef713eaf3ce76073cee Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 17 Oct 2017 20:37:38 +0100 Subject: [PATCH 28/37] Add licence --- LICENSE | 661 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 661 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..dbbe355 --- /dev/null +++ b/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. From ccd881f1abd4fd75c1f6a4aae6ebe51cb9232d7e Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 17 Oct 2017 20:50:00 +0100 Subject: [PATCH 29/37] Change test suite used in travis --- .travis.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1989da4..7b1e4c9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,22 +9,21 @@ env: language: python python: + - "3.3" - "3.6" install: - - pip install nose coverage + - pip install pytest pytest-cov - pip install progressbar2 - pip install xlrd - before_script: - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter - chmod +x ./cc-test-reporter - ./cc-test-reporter before-build script: - - nosetests --with-coverage --cover-package=cgp_seq_input_val - - coverage xml + - pytest --cov-branch --cov-report term --cov=cgp_seq_input_val after_script: - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT From e8bc0b4f2b8b618f6ee5611a167efc8b560f6099 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 17 Oct 2017 20:50:39 +0100 Subject: [PATCH 30/37] Add branch coverage and change linter --- run_tests.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/run_tests.sh b/run_tests.sh index 1b8caca..5a829c3 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -1,14 +1,14 @@ #!/usr/bin/env bash set -e -pytest --cov-report term --cov-report html --cov=cgp_seq_input_val --cov-fail-under=50 +pytest --cov-branch --cov-report term --cov-report html --cov=cgp_seq_input_val --cov-fail-under=50 set +e # these should not die: -echo -e "\n##########################" -echo "# Running pep8 (style) #" -echo "##########################" -pep8 --format=pylint cgp_seq_input_val +echo -e "\n#################################" +echo "# Running pycodestyle (style) #" +echo "#################################" +pycodestyle cgp_seq_input_val echo -e "\n#########################################" echo "# Running radon (cyclomatic complexity) #" From 8247a73c137feb2bd814a8d8069ec6f80001832f Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 17 Oct 2017 20:50:56 +0100 Subject: [PATCH 31/37] Docs and deps --- README.md | 22 ++++++++++++++++++++++ setup.py | 7 ++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f31f3ca..78f551f 100644 --- a/README.md +++ b/README.md @@ -141,3 +141,25 @@ $ scp cgp_seq_input_val-1.1.0-py3-none-any.whl user@host:~/wheels # on host $ pip install --find-links=~/wheels cgp_seq_input_val ``` + + +LICENCE +======== +Copyright (c) 2017 Genome Research Ltd. + +Author: CancerIT + +This file is part of cgp_seq_input_val. + +cgp_seq_input_val is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . diff --git a/setup.py b/setup.py index 9408ecc..d6fd874 100755 --- a/setup.py +++ b/setup.py @@ -6,8 +6,8 @@ 'name': 'cgp_seq_input_val', 'description': 'Code to validate manifests and raw seq data', 'author': 'Keiran M Raine', - 'url': 'https://gitlab.internal.sanger.ac.uk/CancerIT/cgp_seq_input_val', - 'download_url': 'Where to download it.', + 'url': 'https://github.com/cancerit/cgp_seq_input_val', + 'download_url': '', 'author_email': 'cgphelp@sanger.ac.uk', 'version': '1.1.0', 'python_requires': '>= 3.3', @@ -17,7 +17,8 @@ 'package_data': {'cgp_seq_input_val': ['config/*.json']}, 'entry_points': { 'console_scripts': ['cgpSeqInputVal=cgp_seq_input_val.command_line:main'], - } + }, + 'metadata': {'license_file': 'LICENSE'}, } setup(**config) From 1e13441fe4f046e730c5fe7a8b17e82bb9bf0950 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 17 Oct 2017 20:54:20 +0100 Subject: [PATCH 32/37] py3.3 needs additional package to use pytest --- .travis.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.travis.yml b/.travis.yml index 7b1e4c9..6f98e74 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,11 @@ env: global: - CC_TEST_REPORTER_ID=24af7ff1e88f3b70c8b9a5280ce9604d561dacd8eaa7b1d895128ca2bd724beb +addons: + apt: + packages: + - python-logilab-common + language: python python: From 54b0413ab2b7f2b609d459db15085e55daf8c5dd Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 17 Oct 2017 20:54:46 +0100 Subject: [PATCH 33/37] add comment to package --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 6f98e74..9fca16d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ env: addons: apt: packages: - - python-logilab-common + - python-logilab-common # only for py3.3 language: python From 75d54864534662339886e16bdc6e775580dd5a52 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 17 Oct 2017 21:28:46 +0100 Subject: [PATCH 34/37] use 3.4 as minimum --- .travis.yml | 7 +------ setup.py | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9fca16d..3734842 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,15 +6,10 @@ env: global: - CC_TEST_REPORTER_ID=24af7ff1e88f3b70c8b9a5280ce9604d561dacd8eaa7b1d895128ca2bd724beb -addons: - apt: - packages: - - python-logilab-common # only for py3.3 - language: python python: - - "3.3" + - "3.4" - "3.6" install: diff --git a/setup.py b/setup.py index d6fd874..3f8cf95 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'download_url': '', 'author_email': 'cgphelp@sanger.ac.uk', 'version': '1.1.0', - 'python_requires': '>= 3.3', + 'python_requires': '>= 3.4', 'setup_requires': ['pytest'], 'install_requires': ['progressbar2', 'xlrd'], 'packages': ['cgp_seq_input_val'], From 109ef3a00b4a1ec16f8c928f4ebb2e70d2476090 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Thu, 19 Oct 2017 14:35:58 +0100 Subject: [PATCH 35/37] Add markdown linter and cleanup --- README.md | 10 +++++++--- run_tests.sh | 5 +++++ setup.py | 1 - 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 78f551f..fabac3a 100644 --- a/README.md +++ b/README.md @@ -123,9 +123,14 @@ pip install pytest pip install pytest-cov pip install pep8 pip install radon +gem install --user-install mdl ``` -__Also see__ [Package Dependancies](#package-dependancies) +Test that `mdl` is available, if not add the following to your path variable: + +``` +export PATH=$HOME/.gem/ruby/X.X.X/bin:$PATH +``` ### Cutting a release @@ -142,9 +147,8 @@ $ scp cgp_seq_input_val-1.1.0-py3-none-any.whl user@host:~/wheels $ pip install --find-links=~/wheels cgp_seq_input_val ``` +## LICENCE -LICENCE -======== Copyright (c) 2017 Genome Research Ltd. Author: CancerIT diff --git a/run_tests.sh b/run_tests.sh index 5a829c3..1f4c918 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -20,4 +20,9 @@ echo "# Running radon (maintainability index) #" echo "#########################################" radon mi -s cgp_seq_input_val +echo -e "\n##############################" +echo "# Running mdl (markdownlint) #" +echo "##############################" +mdl . + exit 0 # don't die based on assements of code quality diff --git a/setup.py b/setup.py index 3f8cf95..b58d10c 100755 --- a/setup.py +++ b/setup.py @@ -18,7 +18,6 @@ 'entry_points': { 'console_scripts': ['cgpSeqInputVal=cgp_seq_input_val.command_line:main'], }, - 'metadata': {'license_file': 'LICENSE'}, } setup(**config) From 0ec9b67f68614be98a0021730f60967d78ceb561 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Thu, 19 Oct 2017 14:37:27 +0100 Subject: [PATCH 36/37] Fixes typo spotted during pre merge checking --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fabac3a..0781734 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ You can run the same checks manually without a commit by executing the following in the base of the clone: ```bash -./run_tests.psh +./run_tests.sh ``` ### Development Dependencies From 281a1678a6183cf53a93c65ed68b58349037f249 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 22 Nov 2017 14:34:16 +0000 Subject: [PATCH 37/37] Version bump --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b58d10c..7746631 100755 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ 'url': 'https://github.com/cancerit/cgp_seq_input_val', 'download_url': '', 'author_email': 'cgphelp@sanger.ac.uk', - 'version': '1.1.0', + 'version': '1.2.0', 'python_requires': '>= 3.4', 'setup_requires': ['pytest'], 'install_requires': ['progressbar2', 'xlrd'],