From dbd51a5daf42e128a6017634ad33aff1c3e08ff1 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 10:49:56 +0100
Subject: [PATCH 01/37] Add ci

---
 .travis.yml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..7f5bf24
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,15 @@
+notifications:
+  slack: wtsi-cgpit:ptUMR1tkNyZJYd9TpGoss8WR
+  email: false
+
+language: python
+python:
+  - "3.6"
+
+install:
+  - pip install nose
+  - pip install progressbar2
+  - pip install xlrd
+
+script:
+  - ./run_tests.sh

From 990695e28d286759465720d2383cbbcc19783ea2 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 10:53:15 +0100
Subject: [PATCH 02/37] vanilla nose

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 7f5bf24..cf25633 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,4 +12,4 @@ install:
   - pip install xlrd
 
 script:
-  - ./run_tests.sh
+  - nosetests

From faac016086fb75aeebd3b03936bc6a6ccf6880eb Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 11:00:04 +0100
Subject: [PATCH 03/37] Add coverage, aiming for codeclimate reports

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index cf25633..dac145b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,9 +7,9 @@ python:
   - "3.6"
 
 install:
-  - pip install nose
+  - pip install nose coverage
   - pip install progressbar2
   - pip install xlrd
 
 script:
-  - nosetests
+  - nosetests --with-coverage --cover-erase --cover-package=cgp_seq_input_val

From 77400701d71d6d208717194647caa6ddb0fb34c6 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 11:24:55 +0100
Subject: [PATCH 04/37] Add config for codeclimate

---
 .codeclimate.yml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 .codeclimate.yml

diff --git a/.codeclimate.yml b/.codeclimate.yml
new file mode 100644
index 0000000..fed5c9a
--- /dev/null
+++ b/.codeclimate.yml
@@ -0,0 +1,24 @@
+engines:
+  duplication:
+  	enabled: true
+    config:
+    	languages:
+      - python
+        #mass_threshold: 30
+  fixme:
+    enabled: true
+  markdownlint:
+    enabled: true
+  pep8:
+    enabled: true
+  radon:
+    enabled: true
+
+ratings:
+  paths:
+    - "**.py"
+    - "**.md"
+
+exclude_paths:
+  - "git-hooks/"
+  - "data/"

From 03cd3d43814ab6a7c7e3b62b880cd4424993d85e Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 11:25:53 +0100
Subject: [PATCH 05/37] correct yml lint

---
 .codeclimate.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.codeclimate.yml b/.codeclimate.yml
index fed5c9a..ab51933 100644
--- a/.codeclimate.yml
+++ b/.codeclimate.yml
@@ -1,8 +1,8 @@
 engines:
   duplication:
-  	enabled: true
+    enabled: true
     config:
-    	languages:
+      languages:
       - python
         #mass_threshold: 30
   fixme:

From 5a7851e9e58158320f9635b9cfb10d561087f48b Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 11:32:26 +0100
Subject: [PATCH 06/37] Prevent tests from being evaluated

---
 .codeclimate.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.codeclimate.yml b/.codeclimate.yml
index ab51933..91aab99 100644
--- a/.codeclimate.yml
+++ b/.codeclimate.yml
@@ -22,3 +22,4 @@ ratings:
 exclude_paths:
   - "git-hooks/"
   - "data/"
+  - "tests/"

From ac5b208c70995aa6e7568d792b4886982dff701c Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 11:40:56 +0100
Subject: [PATCH 07/37] Fixes markdown lint warnings

---
 README.md | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 81fdc3f..10b3375 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,8 @@ And a `json` version of the file ready for use by downstream systems.
 
 ### validate_seq_file.py
 
-Takes an interleaved or a pair of paired-fastq files and produces a simple report of:
+Takes an interleaved or a pair of paired-fastq files and produces a simple report
+of:
 
 ```
 {
@@ -53,50 +54,55 @@ Various exceptions can occur for malformed files.
 
 The primary purpose is to confirm Sanger/Illumina 1.8+ quality scores.
 
-#### Why no BAM/CRAM input?
+#### FASTQ not BAM/CRAM
 
 The flow of the service data will require splitting of any multi-lane BAM/CRAM files
 down to the individual lanes, which we would do to interleaved fastq.  There is no
 current need to parse BAM/CRAM files to check quality encoding directly as the spec
-technically disallows it.  It is possible for BAM files to be incorrectly encoded though.
+technically disallows it.  It is possible for BAM files to be incorrectly encoded
+though.
 
+## INSTALL
 
-# INSTALL
-
-Installation is via `easy_install`.  Simply execute with the path to the compiled 'egg':
+Installation is via `easy_install`.  Simply execute with the path to the compiled
+'egg':
 
 ```bash
 easy_install bundles/cgp_seq_input_val-0.1.0-py3.6.egg
 ```
 
-## Package Dependancies
+### Package Dependancies
 
 `easy_install` will install the relevant dependancies, listed here for convenience:
 
 * [progressbar2](http://progressbar-2.readthedocs.io/en/latest/)
 * [xlrd](https://github.com/python-excel/xlrd)
 
+## Development environment
 
-# Development environment
-This project uses git pre-commit hooks.  As these will execute on your system it is entirely up to you if you activate them.
+This project uses git pre-commit hooks.  As these will execute on your system it
+is entirely up to you if you activate them.
 
-If you want tests, coverage reports and lint-ing to automatically execute before a commit you can activate them by running:
+If you want tests, coverage reports and lint-ing to automatically execute before
+a commit you can activate them by running:
 
 ```
 git config core.hooksPath git-hooks
 ```
 
-Only a test failure will block a commit, lint-ing is not enforced (but please consider following the guidance).
+Only a test failure will block a commit, lint-ing is not enforced (but please consider
+following the guidance).
 
-You can run the same checks manually without a commit by executing the following in the base of the clone:
+You can run the same checks manually without a commit by executing the following
+in the base of the clone:
 
 ```bash
 ./run_tests.py
 ```
 
-## Development Dependencies
+### Development Dependencies
 
-### Setup VirtualEnv:
+#### Setup VirtualEnv
 
 ```
 cd $PROJECTROOT
@@ -114,11 +120,11 @@ env/bin/pip install coverage
 env/bin/pip install pylint
 ```
 
-__Also see [Package Dependancies](#package-dependancies)__
+__Also see__ [Package Dependancies](#package-dependancies)
 
-## Cutting a release
+### Cutting a release
 
-__Make sure the version is incremented in ./setup.py__
+__Make sure the version is incremented__ in `./setup.py`
 
 The release is handled by setuptools:
 

From dafa87439f532e9f492ec157e8fc5bc4c3b25c0e Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 12:05:07 +0100
Subject: [PATCH 08/37] cleanup style and lint errors

---
 .codeclimate.yml                   |   1 +
 cgp_seq_input_val/cliutil.py       |   1 +
 cgp_seq_input_val/error_classes.py |   4 +
 cgp_seq_input_val/fastq_read.py    |  12 ++-
 cgp_seq_input_val/file_meta.py     |  10 +-
 cgp_seq_input_val/manifest.py      | 144 +++++++++++++++++++----------
 cgp_seq_input_val/seq_validator.py |   8 +-
 setup.py                           |   5 +-
 8 files changed, 121 insertions(+), 64 deletions(-)

diff --git a/.codeclimate.yml b/.codeclimate.yml
index 91aab99..70efdda 100644
--- a/.codeclimate.yml
+++ b/.codeclimate.yml
@@ -11,6 +11,7 @@ engines:
     enabled: true
   pep8:
     enabled: true
+    max-line-length: 120
   radon:
     enabled: true
 
diff --git a/cgp_seq_input_val/cliutil.py b/cgp_seq_input_val/cliutil.py
index 6632a94..f82f1d7 100644
--- a/cgp_seq_input_val/cliutil.py
+++ b/cgp_seq_input_val/cliutil.py
@@ -1,6 +1,7 @@
 """General command line utility functions"""
 import os
 
+
 def extn_check(parser, choices, fname, readable=False):
     """Checks file extensions fit expected sets
 
diff --git a/cgp_seq_input_val/error_classes.py b/cgp_seq_input_val/error_classes.py
index 52664b5..6a9eab7 100644
--- a/cgp_seq_input_val/error_classes.py
+++ b/cgp_seq_input_val/error_classes.py
@@ -1,21 +1,25 @@
 """Package error classes"""
 
+
 class SeqValidationError(RuntimeError):
     """Exception for failures to validate data in the manifest."""
     pass
 
+
 class ConfigError(RuntimeError):
     """
     Exception for errors in the values of config/*.json files.
     """
     pass
 
+
 class ParsingError(RuntimeError):
     """
     Exception for errors in the naming of the config/*.json files.
     """
     pass
 
+
 class ValidationError(RuntimeError):
     """
     Exception for failures to validate data in the manifest.
diff --git a/cgp_seq_input_val/fastq_read.py b/cgp_seq_input_val/fastq_read.py
index a485763..620e62d 100644
--- a/cgp_seq_input_val/fastq_read.py
+++ b/cgp_seq_input_val/fastq_read.py
@@ -6,10 +6,11 @@
 
 from cgp_seq_input_val.error_classes import SeqValidationError
 
+
 class FastqRead(object):
     """
-    Models and validates a fastq read, calling print will produce a 4 line record
-    regardess of original format.
+    Models and validates a fastq read, calling print will produce a 4 line
+    record regardess of original format.
 
     Inputs:
         fp: open file pointer to get next read from
@@ -50,7 +51,7 @@ def __init__(self, fq_fh, line_no_in, curr_line):
         self.seq = seq
         self.qual = qual
         self.file_pos = (line_no_in, line_no)
-        self.last_line = curr_line # as we need to pass this back
+        self.last_line = curr_line  # as we need to pass this back
         self.name = None
         self.end = None
 
@@ -69,8 +70,9 @@ def validate(self, filename):
         """
         match = re.match(r'@(\S+)/([12])', self.header)
         if match is None:
-            raise SeqValidationError("Sequence record header must begin with '@' \
-                                     one non-whitespace character and '/[12]', line %d of %s"
+            raise SeqValidationError("Sequence record header must begin with \
+                                     '@' one non-whitespace character and \
+                                     '/[12]', line %d of %s"
                                      % (self.file_pos[0], filename))
         groups = match.groups()
         self.name = groups[0]
diff --git a/cgp_seq_input_val/file_meta.py b/cgp_seq_input_val/file_meta.py
index bb72887..bf8a7b2 100644
--- a/cgp_seq_input_val/file_meta.py
+++ b/cgp_seq_input_val/file_meta.py
@@ -4,6 +4,7 @@
 
 import os
 
+
 class FileMeta(object):
     """
     Oject to hold file metadata as a set of attributes with small set of
@@ -26,7 +27,8 @@ def __init__(self, headers, details, rel_path):
     def get_path(self, f_type):
         """
         Returns the path of a file after pre-pending with the 'rel_path'
-        All file entries in the manifest should be relative to the manifest itself.
+        All file entries in the manifest should be relative to the manifest
+        itself.
         """
         item = self.attributes[f_type]
         if item == '.':
@@ -45,9 +47,11 @@ def test_files(self, line):
                 continue
 
             if not os.path.isfile(full_path):
-                raise FileValidationError("'%s' is not a file ('%s' - line %d)." % (item, f_type, line))
+                raise FileValidationError("'%s' is not a file ('%s' - line %d)."
+                                          % (item, f_type, line))
             if not os.path.getsize(full_path):
-                raise FileValidationError("'%s' is an empty file ('%s' - line %d)." % (item, f_type, line))
+                raise FileValidationError("'%s' is an empty file ('%s' - line %d)."
+                                          % (item, f_type, line))
 
 class FileValidationError(RuntimeError):
     """
diff --git a/cgp_seq_input_val/manifest.py b/cgp_seq_input_val/manifest.py
index dd1ee22..369ea3b 100644
--- a/cgp_seq_input_val/manifest.py
+++ b/cgp_seq_input_val/manifest.py
@@ -15,8 +15,11 @@
 from cgp_seq_input_val.error_classes import ConfigError, ParsingError, ValidationError
 from cgp_seq_input_val.file_meta import FileMeta
 
-VAL_LIM_ERROR = "Only %d sample(s) with a value of '%s' is allowed in column '%s' when rows grouped by '%s'"
-VAL_LIM_CONFIG_ERROR = "'limit' and 'limit_by' must both be defined when either is present, check body.validate."
+VAL_LIM_ERROR = "Only %d sample(s) with a value of '%s' is allowed in column \
+                '%s' when rows grouped by '%s'"
+VAL_LIM_CONFIG_ERROR = "'limit' and 'limit_by' must both be defined when either \
+                       is present, check body.validate."
+
 
 def uuid4_chk(uuid_str):
     """Tests validity of uuid"""
@@ -26,6 +29,7 @@ def uuid4_chk(uuid_str):
         return False
     return val.hex == uuid_str.replace('-', '')
 
+
 def normalise(args):
     """
     Takes the arguments captured by the normalise_manifest.py executable
@@ -35,15 +39,18 @@ def normalise(args):
     # Extensions are checked by argparse
     if args.input.endswith('tsv') is True:
         if args.output is None:
-            print("\nINFO: input and output will be same file, no action required\n", file=sys.stderr)
+            print("\nINFO: input and output will be same file, no action \
+                  required\n", file=sys.stderr)
             return True
         else:
             if os.path.exists(args.output):
                 if os.path.samefile(args.input, args.output):
-                    print("\nINFO: input and output point to the same file, no action required", file=sys.stderr)
+                    print("\nINFO: input and output point to the same file, no \
+                          action required", file=sys.stderr)
                     return True
             # anything else is a copy
-            print("\nINFO: input copied to output, no format conversion required.", file=sys.stderr)
+            print("\nINFO: input copied to output, no format conversion \
+                  required.", file=sys.stderr)
             shutil.copyfile(args.input, args.output, follow_symlinks=True)
             return True
 
@@ -53,10 +60,11 @@ def normalise(args):
     manifest = Manifest(args.input)
     manifest.convert_by_extn(args.output)
 
+
 def evaulate_value_limits(field, chk, limit_chks):
     """
-    Handles validation of fields where presence of partiular value has a max occurence
-    within a grouping of rows
+    Handles validation of fields where presence of partiular value has a max
+    occurence within a grouping of rows
     """
     for val_limit in chk:
         if 'limit' not in val_limit:
@@ -73,11 +81,15 @@ def evaulate_value_limits(field, chk, limit_chks):
                                                        field,
                                                        val_limit['limit_by']))
 
+
 class Manifest(object):
     """
     Top level object used to validate a manifest TSV file.
-    This runs validation of the header and body in turn rasing execptions as appropriate.
-    Configuration is handled via the json files found in the config sub directory.
+    This runs validation of the header and body in turn rasing execptions as
+    appropriate.
+
+    Configuration is handled via the json files found in the config sub
+    directory.
     """
     def __init__(self, infile):
         self.infile = infile
@@ -102,7 +114,10 @@ def _csv_to_tsv(self, ofh):
 
     def _excel_to_tsv(self, ofh):
         xlrd = import_module('xlrd')
-        book = xlrd.open_workbook(self.infile, formatting_info=False, on_demand=True, ragged_rows=True)
+        book = xlrd.open_workbook(self.infile,
+                                  formatting_info=False,
+                                  on_demand=True,
+                                  ragged_rows=True)
         sheet = book.sheet_by_name('For entry')
         for r in range(0, sheet.nrows):
             simplerow = []
@@ -115,16 +130,16 @@ def _excel_to_tsv(self, ofh):
             print("\t".join(simplerow), file=ofh)
 
     def convert_by_extn(self, outfile):
-        """Uses the input file extension to determine the correct file conversion
-        routine.  Output is always tsv file.  Expects the output file name extension
-        to have been checked in advance.
+        """
+        Uses the input file extension to determine the correct file conversion
+        routine.  Output is always tsv file.  Expects the output file name
+        extension to have been checked in advance.
         """
         with open(outfile, 'w') as ofh:
             convertor = getattr(self, '_' + self.informat + '_to_tsv')
             convertor(ofh)
 
 
-
     def validate(self, checkFiles=False):
         """
         Runs the actual validation of a manifest:
@@ -135,7 +150,8 @@ def validate(self, checkFiles=False):
          - Validate body
         """
         if self.informat != 'tsv':
-            raise ValueError('Manifest.validate only accepts files of type "tsv"')
+            raise ValueError('Manifest.validate only accepts files of type \
+                             "tsv"')
         # Generate the header object
         self.header = Header(self.infile)
         self.config = self.header.get_config()
@@ -176,9 +192,11 @@ def write(self, outdir):
     def get_uuid(self):
         """Get the uuid for this manifest"""
         if not self.header:
-            raise ValidationError('manifest.validate() must be called before manifest.get_uuid()')
+            raise ValidationError('manifest.validate() must be called before \
+                                  manifest.get_uuid()')
         return self.header.uuid
 
+
 class Header(object):
     """
     Object to load and validate the header section of a manifest
@@ -223,7 +241,9 @@ def get_config(self, cfg_file=None):
         config = None
         if cfg_file is None:
             resource = 'config/%s-%s.json' % (self.type, self.version)
-            resource_as_string = resource_string(__name__, resource).decode("utf-8", "strict")
+            resource_as_string = resource_string(__name__,
+                                                 resource).decode("utf-8",
+                                                                  "strict")
             config = json.loads(resource_as_string)
             # for error messages
             cfg_file = resource_filename(__name__, resource)
@@ -233,11 +253,11 @@ def get_config(self, cfg_file=None):
                 config = json.load(j)
 
         if config['type'] != self.type:
-            raise ParsingError("Filename (%s) does not match 'type' (%s) within file"
-                               % (cfg_file, config['type']))
+            raise ParsingError("Filename (%s) does not match 'type' (%s) \
+                               within file" % (cfg_file, config['type']))
         if config['version'] != self.version:
-            raise ParsingError("Filename (%s) does not match 'version' (%s) within file"
-                               % (cfg_file, config['version']))
+            raise ParsingError("Filename (%s) does not match 'version' (%s) \
+                               within file" % (cfg_file, config['version']))
 
         self.validate_json(config)
         return config
@@ -253,37 +273,40 @@ def validate_json(self, config):
           - body content validated by it's own class.
         """
         if 'header' not in config:
-            raise ConfigError("header (dict/hash) not found in json file: %s-%s.json"
-                              % (self.type, self.version))
+            raise ConfigError("header (dict/hash) not found in json file: \
+                              %s-%s.json" % (self.type, self.version))
         if 'expected' not in config['header']:
-            raise ConfigError("header.expected (list/array) not found in json file: %s-%s.json"
-                              % (self.type, self.version))
+            raise ConfigError("header.expected (list/array) not found in json \
+                              file: %s-%s.json" % (self.type, self.version))
         if 'required' not in config['header']:
-            raise ConfigError("header.required (list/array) not found in json file: %s-%s.json"
-                              % (self.type, self.version))
+            raise ConfigError("header.required (list/array) not found in json \
+                              file: %s-%s.json" % (self.type, self.version))
         if 'validate' not in config['header']:
-            raise ConfigError("header.validate (dict/hash) not found in json file: %s-%s.json"
-                              % (self.type, self.version))
+            raise ConfigError("header.validate (dict/hash) not found in json \
+                              file: %s-%s.json" % (self.type, self.version))
         if 'body' not in config:
-            raise ConfigError("body (dict/hash) not found in json file: %s-%s.json"
-                              % (self.type, self.version))
+            raise ConfigError("body (dict/hash) not found in json file: \
+                              %s-%s.json" % (self.type, self.version))
+
 
     def fields_exist(self, expected):
         """
-        Checks all field that are expected to exist in the header of this type+version
-        of the manifest.  It is not checking for values, just the expected elements.
-        These are detailed in the header.expected element of the json file.
-        Adds these to the 'items' dict of the header object.
+        Checks all field that are expected to exist in the header of this
+        type+version of the manifest.  It is not checking for values, just the
+        expected elements. These are detailed in the header.expected element of
+        the json file. Adds these to the 'items' dict of the header object.
         """
         found = set(self._all_items.keys())
         expected_fields = set(expected)
         unexpected = found.difference(expected_fields)
-        if unexpected: # empty sequences are false, don't use "len() > 0"
-            raise ValidationError("The following unexpected fields were found in the header of your file:\n\t'"
+        if unexpected:
+            raise ValidationError("The following unexpected fields were found \
+                                  in the header of your file:\n\t'"
                                   + "'\n\t'".join(unexpected) + "'")
         missing_fields = expected_fields.difference(found)
         if missing_fields:
-            raise ValidationError("The following expected fields were missing from the header of your file:\n\t'"
+            raise ValidationError("The following expected fields were missing \
+                                  from the header of your file:\n\t'"
                                   + "'\n\t'".join(missing_fields) + "'")
         # add the elements to the approved header items dict
         for key, val in self._all_items.items():
@@ -291,22 +314,26 @@ def fields_exist(self, expected):
                 continue
             self.items[key] = val
 
+
     def fields_have_values(self, required):
         """
         Check all fields that should have values do for this type+version.
         These are detailed in the header.required element of the json file.
         """
         for item in required:
-            if not self.items[item]: # empty sequences are false, don't use "len() == 0"
+            if not self.items[item]:
                 raise ValidationError("Header item '%s' has no value." % (item))
 
+
     def field_values_valid(self, validate):
         """
         Checks all restricted fields have valid values for this type+version.
         """
         for item in validate:
             if self.items[item] not in validate[item]:
-                raise ValidationError("Header item '%s' has an invalid value of: %s" % (item, self.items[item]))
+                raise ValidationError("Header item '%s' has an invalid value \
+                                      of: %s" % (item, self.items[item]))
+
 
     def validate(self, rules):
         """
@@ -326,9 +353,11 @@ def validate(self, rules):
         else:
             uuid_found = self.items['Our Ref:']
             if not uuid4_chk(uuid_found):
-                raise ValidationError("Value found at 'Our Ref' is not a valid uuid4: "+uuid_found)
+                raise ValidationError("Value found at 'Our Ref' is not a valid \
+                                      uuid4: "+uuid_found)
             self.uuid = uuid_found
 
+
 class Body(object):
     """
     Body object validates the individual records of a manifest.
@@ -337,7 +366,7 @@ class Body(object):
     """
     def __init__(self, manifest, config):
         self.manifest = manifest
-        self.offset = 1 # start at one as would need to increment for header line otherwise
+        self.offset = 1 # start at one otherwise need to increment for header
         manifest_dir = os.path.dirname(manifest)
         csv = import_module('csv')
         self.file_detail = []
@@ -352,7 +381,10 @@ def __init__(self, manifest, config):
                 if not loadRows:
                     self.offset += 1
                     continue
-                self.file_detail.append(FileMeta(self.headings, row, manifest_dir))
+                self.file_detail.append(FileMeta(self.headings,
+                                                 row,
+                                                 manifest_dir))
+
 
     def write(self, fp, config):
         """
@@ -372,6 +404,7 @@ def write(self, fp, config):
                 print("\t".join(row), file=fp)
         return for_json
 
+
     def validate(self, rules):
         """
         Runs the different elements of body validation:
@@ -383,12 +416,13 @@ def validate(self, rules):
         self.uniq_files()
         self.file_ext_check(rules['validate_ext'])
 
+
     def field_values_valid(self, validate):
         """
         Check fields with restriced dict are valid
         Must run after self.fields_have_values()
-        If 'limit' and 'limit_by' are defined will create a counter for each of these entities
-        and error if 'limit' exceeded
+        If 'limit' and 'limit_by' are defined will create a counter for each of
+        these entities and error if 'limit' exceeded
         """
         for field, chk in validate.items():
             cnt = self.offset
@@ -397,7 +431,8 @@ def field_values_valid(self, validate):
                 cnt += 1
                 # checks all values are valid
                 if fd.attributes[field] not in [d['value'] for d in chk]:
-                    raise ValidationError("Metadata item '%s' has an invalid value of '%s' on line %d"
+                    raise ValidationError("Metadata item '%s' has an invalid \
+                                          value of '%s' on line %d"
                                           % (field, fd.attributes[field], cnt))
                 # Construct value occurence limiting counts
                 for val_limit in chk:
@@ -419,6 +454,7 @@ def field_values_valid(self, validate):
                         limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] += 1
             evaulate_value_limits(field, chk, limit_chks)
 
+
     def fields_have_values(self, rules):
         """
         Check the fields listed as required are populated
@@ -428,9 +464,11 @@ def fields_have_values(self, rules):
             cnt += 1
             for req in rules:
                 if (not fd.attributes[req]) or fd.attributes[req] == '.':
-                    raise ValidationError("Required metadata value absent for '%s' on line %d ('.' not acceptable)"
+                    raise ValidationError("Required metadata value absent for \
+                                          '%s' on line %d ('.' not acceptable)"
                                           % (req, cnt))
 
+
     def uniq_files(self):
         """
         Check all filenames are uniq within this manifest
@@ -444,10 +482,12 @@ def uniq_files(self):
                 if item == '.':
                     continue
                 if item in all_files:
-                    raise ValidationError("Metadata item '%s' has a duplicate value of '%s' on line %d"
+                    raise ValidationError("Metadata item '%s' has a duplicate \
+                                          value of '%s' on line %d"
                                           % (f_type, item, cnt))
                 all_files.append(item)
 
+
     def file_ext_check(self, rules):
         """
         Check all files have valid extentions
@@ -469,14 +509,17 @@ def file_ext_check(self, rules):
                 full_ext = ext + extra
 
                 if full_ext not in rules[f_type]:
-                    raise ValidationError("File extension of '%s' is not valid, '%s' on line %d"
+                    raise ValidationError("File extension of '%s' is not valid, \
+                                          '%s' on line %d"
                                           % (full_ext, f_type, cnt))
 
                 if last_ext is not None and last_ext != full_ext:
-                    raise ValidationError("File extensions for same row must match, '%s' vs '%s' on line %d"
+                    raise ValidationError("File extensions for same row must \
+                                          match, '%s' vs '%s' on line %d"
                                           % (last_ext, full_ext, cnt))
                 last_ext = full_ext
 
+
     def heading_check(self, config):
         """
         Simple check for correct, ordered headings for file rows.
@@ -488,6 +531,7 @@ def heading_check(self, config):
                                   + "\nbut got\n\t"
                                   + ', '.join(self.headings))
 
+
     def file_tests(self):
         """
         Test for file existance and content
diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py
index 3074378..88d8cb2 100644
--- a/cgp_seq_input_val/seq_validator.py
+++ b/cgp_seq_input_val/seq_validator.py
@@ -16,6 +16,7 @@
 
 prog_records = 100000
 
+
 class SeqValidator(object):
     """
     Validate sequence file, currently only does fastq (interleaved or paired)
@@ -32,7 +33,7 @@ def __init__(self, file_a, file_b=None, progress_pairs=prog_records):
         self.file_b = file_b
         self.pairs = 0
         # will use this to decide on path
-        self.is_gzip = False # change open method for fastq
+        self.is_gzip = False  # change open method for fastq
         # sam is not supported
 
         # only the min value is actually needed to determine if scaling
@@ -62,7 +63,7 @@ def _prep(self):
         full_ext = ext + full_ext
 
         if self.file_b is None:
-            self.file_b = self.file_a # use equality to indicate interleaved
+            self.file_b = self.file_a  # use equality to indicate interleaved
         elif not self.file_b.endswith(full_ext):
             raise SeqValidationError("Input files be of same type")
 
@@ -87,8 +88,7 @@ def report(self, fp):
         """
         report = {'pairs': self.pairs,
                   'valid_q': self.q_min == 33,
-                  'interleaved': self.file_a == self.file_b
-                 }
+                  'interleaved': self.file_a == self.file_b}
         json.dump(report, fp, sort_keys=True, indent=4)
 
     def validate_paired(self):
diff --git a/setup.py b/setup.py
index f24100a..fc2874b 100755
--- a/setup.py
+++ b/setup.py
@@ -12,10 +12,11 @@
     'version': '1.1.0',
     'python_requires': '>= 3.3',
     'setup_requires': ['nose>=1.0'],
-    'install_requires': ['progressbar2','xlrd'],
+    'install_requires': ['progressbar2', 'xlrd'],
     'packages': ['cgp_seq_input_val'],
     'package_data': {'cgp_seq_input_val': ['config/*.json']},
-    'scripts': ['bin/normalise_manifest.py', 'bin/validate_manifest.py', 'bin/validate_seq_file.py']
+    'scripts': ['bin/normalise_manifest.py', 'bin/validate_manifest.py',
+                'bin/validate_seq_file.py']
 }
 
 setup(**config)

From 866206257de489c698b43fdb0dc098fbfb6becdc Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 12:08:52 +0100
Subject: [PATCH 09/37] Fix line length

---
 .codeclimate.yml | 1 -
 tox.ini          | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 tox.ini

diff --git a/.codeclimate.yml b/.codeclimate.yml
index 70efdda..91aab99 100644
--- a/.codeclimate.yml
+++ b/.codeclimate.yml
@@ -11,7 +11,6 @@ engines:
     enabled: true
   pep8:
     enabled: true
-    max-line-length: 120
   radon:
     enabled: true
 
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..4879cb6
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,2 @@
+[pep8]
+max-line-length = 160

From d9ffaa01149c4e8420964dd7ed484c4e7ff770db Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 12:16:42 +0100
Subject: [PATCH 10/37] More style correctionse

---
 bin/normalise_manifest.py          |  2 +-
 bin/validate_manifest.py           |  5 +++--
 bin/validate_seq_file.py           |  2 +-
 cgp_seq_input_val/file_meta.py     |  1 +
 cgp_seq_input_val/manifest.py      | 31 +++++++++---------------------
 cgp_seq_input_val/seq_validator.py |  7 +++----
 6 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/bin/normalise_manifest.py b/bin/normalise_manifest.py
index d232cf1..9033fca 100755
--- a/bin/normalise_manifest.py
+++ b/bin/normalise_manifest.py
@@ -12,7 +12,7 @@
 
 version = pkg_resources.require("cgp_seq_input_val")[0].version
 
-## read variables, auto help text
+# read variables, auto help text
 parser = argparse.ArgumentParser(description='Convert manifest files to common denominator (tsv)',
                                  epilog='Input can be [xls|xlsx|csv|tsv].  "tsv" is just copied to maintain tool-chain')
 parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
diff --git a/bin/validate_manifest.py b/bin/validate_manifest.py
index 85f40c8..a753d58 100755
--- a/bin/validate_manifest.py
+++ b/bin/validate_manifest.py
@@ -14,7 +14,7 @@
 
 version = pkg_resources.require("cgp_seq_input_val")[0].version
 
-## read variables, auto help text
+# read variables, auto help text
 parser = argparse.ArgumentParser(description='Validate a tsv import manifest file')
 parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
 parser.add_argument('-i', '--input', dest='input', metavar='FILE',
@@ -30,7 +30,8 @@
 try:
     manifest = Manifest(args.input)
     manifest.validate()
-    (tsv_file, json_file) = manifest.write(args.output) # output new manifest in tsv and json.
+    # output new manifest in tsv and json.
+    (tsv_file, json_file) = manifest.write(args.output)
     print("Created files:\n\t%s\n\t%s" % (tsv_file, json_file))
 except ValidationError as ve:
     print("ERROR: " + str(ve), file=sys.stderr)
diff --git a/bin/validate_seq_file.py b/bin/validate_seq_file.py
index f8b8964..65924b5 100755
--- a/bin/validate_seq_file.py
+++ b/bin/validate_seq_file.py
@@ -35,7 +35,7 @@
     validator = SeqValidator(args.input[0], file_2)
     validator.validate()
     validator.report(args.report)
-except SeqValidationError as ve: # runtime so no functions for message and errno
+except SeqValidationError as ve:  # runtime so no functions for message and errno
     print("ERROR: " + str(ve), file=sys.stderr)
     exit(1)
 # have to catch 2 classes works 3.0-3.3, above 3.3 all IO issues are captured under OSError
diff --git a/cgp_seq_input_val/file_meta.py b/cgp_seq_input_val/file_meta.py
index bf8a7b2..d9456e9 100644
--- a/cgp_seq_input_val/file_meta.py
+++ b/cgp_seq_input_val/file_meta.py
@@ -53,6 +53,7 @@ def test_files(self, line):
                 raise FileValidationError("'%s' is an empty file ('%s' - line %d)."
                                           % (item, f_type, line))
 
+
 class FileValidationError(RuntimeError):
     """
     Exception for failures to validate data in the manifest.
diff --git a/cgp_seq_input_val/manifest.py b/cgp_seq_input_val/manifest.py
index 369ea3b..263974f 100644
--- a/cgp_seq_input_val/manifest.py
+++ b/cgp_seq_input_val/manifest.py
@@ -139,7 +139,6 @@ def convert_by_extn(self, outfile):
             convertor = getattr(self, '_' + self.informat + '_to_tsv')
             convertor(ofh)
 
-
     def validate(self, checkFiles=False):
         """
         Runs the actual validation of a manifest:
@@ -288,7 +287,6 @@ def validate_json(self, config):
             raise ConfigError("body (dict/hash) not found in json file: \
                               %s-%s.json" % (self.type, self.version))
 
-
     def fields_exist(self, expected):
         """
         Checks all field that are expected to exist in the header of this
@@ -300,21 +298,20 @@ def fields_exist(self, expected):
         expected_fields = set(expected)
         unexpected = found.difference(expected_fields)
         if unexpected:
+            joined_vars = "'\n\t'".join(unexpected)
             raise ValidationError("The following unexpected fields were found \
-                                  in the header of your file:\n\t'"
-                                  + "'\n\t'".join(unexpected) + "'")
+                                  in the header of your file:\n\t'" + joined_vars + "'")
         missing_fields = expected_fields.difference(found)
         if missing_fields:
+            joined_vars = "'\n\t'".join(missing_fields)
             raise ValidationError("The following expected fields were missing \
-                                  from the header of your file:\n\t'"
-                                  + "'\n\t'".join(missing_fields) + "'")
+                                  from the header of your file:\n\t'" + joined_vars + "'")
         # add the elements to the approved header items dict
         for key, val in self._all_items.items():
             if key in ('Form type:', 'Form version:'):
                 continue
             self.items[key] = val
 
-
     def fields_have_values(self, required):
         """
         Check all fields that should have values do for this type+version.
@@ -324,7 +321,6 @@ def fields_have_values(self, required):
             if not self.items[item]:
                 raise ValidationError("Header item '%s' has no value." % (item))
 
-
     def field_values_valid(self, validate):
         """
         Checks all restricted fields have valid values for this type+version.
@@ -334,7 +330,6 @@ def field_values_valid(self, validate):
                 raise ValidationError("Header item '%s' has an invalid value \
                                       of: %s" % (item, self.items[item]))
 
-
     def validate(self, rules):
         """
         Runs the different elements of header validation:
@@ -366,7 +361,7 @@ class Body(object):
     """
     def __init__(self, manifest, config):
         self.manifest = manifest
-        self.offset = 1 # start at one otherwise need to increment for header
+        self.offset = 1  # start at one otherwise need to increment for header
         manifest_dir = os.path.dirname(manifest)
         csv = import_module('csv')
         self.file_detail = []
@@ -385,7 +380,6 @@ def __init__(self, manifest, config):
                                                  row,
                                                  manifest_dir))
 
-
     def write(self, fp, config):
         """
         Writes the body to a file-pointer in tsv and returns the values
@@ -404,7 +398,6 @@ def write(self, fp, config):
                 print("\t".join(row), file=fp)
         return for_json
 
-
     def validate(self, rules):
         """
         Runs the different elements of body validation:
@@ -416,7 +409,6 @@ def validate(self, rules):
         self.uniq_files()
         self.file_ext_check(rules['validate_ext'])
 
-
     def field_values_valid(self, validate):
         """
         Check fields with restriced dict are valid
@@ -454,7 +446,6 @@ def field_values_valid(self, validate):
                         limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] += 1
             evaulate_value_limits(field, chk, limit_chks)
 
-
     def fields_have_values(self, rules):
         """
         Check the fields listed as required are populated
@@ -468,7 +459,6 @@ def fields_have_values(self, rules):
                                           '%s' on line %d ('.' not acceptable)"
                                           % (req, cnt))
 
-
     def uniq_files(self):
         """
         Check all filenames are uniq within this manifest
@@ -487,7 +477,6 @@ def uniq_files(self):
                                           % (f_type, item, cnt))
                 all_files.append(item)
 
-
     def file_ext_check(self, rules):
         """
         Check all files have valid extentions
@@ -519,18 +508,16 @@ def file_ext_check(self, rules):
                                           % (last_ext, full_ext, cnt))
                 last_ext = full_ext
 
-
     def heading_check(self, config):
         """
         Simple check for correct, ordered headings for file rows.
         Here to minimise complexity of init
         """
         if self.headings != config['ordered']:
-            raise ValidationError("Expected row headings of\n\t"
-                                  + ', '.join(config['ordered'])
-                                  + "\nbut got\n\t"
-                                  + ', '.join(self.headings))
-
+            raise ValidationError("Expected row headings of\n\t" +
+                                  ', '.join(config['ordered']) +
+                                  "\nbut got\n\t" +
+                                  ', '.join(self.headings))
 
     def file_tests(self):
         """
diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py
index 88d8cb2..84c479e 100644
--- a/cgp_seq_input_val/seq_validator.py
+++ b/cgp_seq_input_val/seq_validator.py
@@ -137,12 +137,12 @@ def validate_paired(self):
                 if curr_line_a == '':
                     if curr_line_b != '':
                         raise SeqValidationError("Read 1 file finished before read 2")
-                    break # if we get here both files are finished
+                    break  # if we get here both files are finished
                 if curr_line_b == '':
                     raise SeqValidationError("Read 2 file finished before read 1")
             self.pairs = pairs
         finally:
-            print(file=sys.stderr) # make sure we move to next line when progress finishes
+            print(file=sys.stderr)  # make sure we move to next line when progress finishes
             if fq_fh_a is not None and not fq_fh_a.closed:
                 fq_fh_a.close()
             if fq_fh_b is not None and not fq_fh_b.closed:
@@ -212,8 +212,7 @@ def check_pair(self, read_1, read_2):
                                      \n\t%s (%s)\n\t%s (%s)"
                                      % (read_1.file_pos[0], read_2.file_pos[0],
                                         read_1.name, self.file_a,
-                                        read_2.name, self.file_b)
-                                    )
+                                        read_2.name, self.file_b))
         if read_1.end != '1':
             raise SeqValidationError("Fastq record at line %d of %s should be for first in pair, got '%s'"
                                      % (read_1.file_pos[0], self.file_a, read_1.end))

From 53ad6e03d8f5e7ef578069a67fd634838c97a121 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 12:18:11 +0100
Subject: [PATCH 11/37] Last style error

---
 cgp_seq_input_val/seq_validator.py | 2 +-
 tox.ini                            | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py
index 84c479e..9d0b62a 100644
--- a/cgp_seq_input_val/seq_validator.py
+++ b/cgp_seq_input_val/seq_validator.py
@@ -189,7 +189,7 @@ def validate_interleaved(self):
                     break
             self.pairs = pairs
         finally:
-            print(file=sys.stderr) # make sure we move to next line when progress finishes
+            print(file=sys.stderr)  # make sure we move to next line when progress finishes
             if fq_fh is not None and not fq_fh.closed:
                 fq_fh.close()
 
diff --git a/tox.ini b/tox.ini
index 4879cb6..d8b4df9 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,2 +1,5 @@
 [pep8]
 max-line-length = 160
+
+[pycodestyle]
+max-line-length = 160

From 7d4cb05458640cc86791481a84e78217642a5166 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 12:28:40 +0100
Subject: [PATCH 12/37] Should add code coverage push to codeclimae as part of
 travis build

---
 .travis.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index dac145b..82d3d7d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,7 +2,12 @@ notifications:
   slack: wtsi-cgpit:ptUMR1tkNyZJYd9TpGoss8WR
   email: false
 
+env:
+  global:
+    - CC_TEST_REPORTER_ID=24af7ff1e88f3b70c8b9a5280ce9604d561dacd8eaa7b1d895128ca2bd724beb
+
 language: python
+
 python:
   - "3.6"
 
@@ -11,5 +16,14 @@ install:
   - pip install progressbar2
   - pip install xlrd
 
+
+before_script:
+  - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
+  - chmod +x ./cc-test-reporter
+  - ./cc-test-reporter before-build
+
 script:
   - nosetests --with-coverage --cover-erase --cover-package=cgp_seq_input_val
+
+after_script:
+  - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT

From 0878d7724e7adf10c9430aea64ec03f670549229 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 12:37:50 +0100
Subject: [PATCH 13/37] don't delete the coverage file

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 82d3d7d..4309ccc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -23,7 +23,7 @@ before_script:
   - ./cc-test-reporter before-build
 
 script:
-  - nosetests --with-coverage --cover-erase --cover-package=cgp_seq_input_val
+  - nosetests --with-coverage --cover-package=cgp_seq_input_val
 
 after_script:
   - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT

From 9b50e80fdd3b12c091039d7afd0fd67a7b08ee7a Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 12:47:20 +0100
Subject: [PATCH 14/37] convert raw coverage output to compatible form

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 4309ccc..1989da4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,6 +24,7 @@ before_script:
 
 script:
   - nosetests --with-coverage --cover-package=cgp_seq_input_val
+  - coverage xml
 
 after_script:
   - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT

From 32acd50a21d751a8940f5b09018c4c12166f251d Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 14:42:54 +0100
Subject: [PATCH 15/37] #Fix 3

---
 .codeclimate.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.codeclimate.yml b/.codeclimate.yml
index 91aab99..11857ae 100644
--- a/.codeclimate.yml
+++ b/.codeclimate.yml
@@ -23,3 +23,4 @@ exclude_paths:
   - "git-hooks/"
   - "data/"
   - "tests/"
+  - "pylintrc"

From 9d60702b9bab468fa817cd71546919dcf643e399 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 27 Sep 2017 14:43:20 +0100
Subject: [PATCH 16/37] raise mass threshold

---
 .codeclimate.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.codeclimate.yml b/.codeclimate.yml
index 11857ae..7207567 100644
--- a/.codeclimate.yml
+++ b/.codeclimate.yml
@@ -3,8 +3,8 @@ engines:
     enabled: true
     config:
       languages:
-      - python
-        #mass_threshold: 30
+        python:
+          mass_threshold: 35
   fixme:
     enabled: true
   markdownlint:

From 5d14661ab828d46286d5edda310d63f4b19da246 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Fri, 29 Sep 2017 10:32:43 +0100
Subject: [PATCH 17/37] More local checks and changes to codeclimate config

---
 .codeclimate.yml |  3 +++
 run_tests.sh     | 16 +++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/.codeclimate.yml b/.codeclimate.yml
index 7207567..06b5b21 100644
--- a/.codeclimate.yml
+++ b/.codeclimate.yml
@@ -4,6 +4,7 @@ engines:
     config:
       languages:
         python:
+          python_version: 3
           mass_threshold: 35
   fixme:
     enabled: true
@@ -13,6 +14,8 @@ engines:
     enabled: true
   radon:
     enabled: true
+    config:
+      threshold: "C"
 
 ratings:
   paths:
diff --git a/run_tests.sh b/run_tests.sh
index 4cf70d2..49a7eec 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -6,7 +6,17 @@ if [ "$code" != "0" ]; then
     exit $code
 fi
 
-echo -e "\n#################\n# Running pylint:\n"
+# these should not die:
+
+echo -e "\n###################################"
+echo      "# Running radon (code complexity) #"
+echo      "###################################"
+env/bin/radon cc -nc bin cgp_seq_input_val
+
+echo -e "\n##########################"
+echo      "# Running pylint (style) #"
+echo      "##########################"
 env/bin/pylint --output-format=colorized bin/*.py cgp_seq_input_val
-echo -e "#\n#################"
-exit 0 # don't die based on pylint
+
+
+exit 0 # don't die based on assements of code quality

From 7bc7af5edf09a3cb74d397089827b3f602d5c97c Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Mon, 9 Oct 2017 16:35:20 +0100
Subject: [PATCH 18/37] Add missing batch of tests and mark some code as not
 possible to cover (with reason)

---
 cgp_seq_input_val/cliutil.py             | 10 ++++++++--
 tests/cgp_seq_input_val_tests_cliutil.py | 22 ++++++++++++++++++++++
 tests/data/cliutil/good.csv              |  0
 tests/data/cliutil/good.tsv              |  0
 tests/data/cliutil/good.xls              |  0
 tests/data/cliutil/good.xlsx             |  0
 6 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 tests/cgp_seq_input_val_tests_cliutil.py
 create mode 100644 tests/data/cliutil/good.csv
 create mode 100644 tests/data/cliutil/good.tsv
 create mode 100644 tests/data/cliutil/good.xls
 create mode 100644 tests/data/cliutil/good.xlsx

diff --git a/cgp_seq_input_val/cliutil.py b/cgp_seq_input_val/cliutil.py
index f82f1d7..a69fa28 100644
--- a/cgp_seq_input_val/cliutil.py
+++ b/cgp_seq_input_val/cliutil.py
@@ -13,8 +13,14 @@ def extn_check(parser, choices, fname, readable=False):
         try:
             handle = open(fname, 'r')
             handle.close()
-        except FileNotFoundError as error:
+        except FileNotFoundError as error: # pragma: no cover
             parser.error(error)
-    if extn not in choices:
+    if extn not in choices: # pragma: no cover
         parser.error("File doesn't end with {}".format(choices))
     return fname
+
+"""
+Why 'pragma: no cover'
+to cover parser errors in test cases you have to add a fair amount of additional
+code, as we know that raising an error this way is robust consider this covered.
+"""
diff --git a/tests/cgp_seq_input_val_tests_cliutil.py b/tests/cgp_seq_input_val_tests_cliutil.py
new file mode 100644
index 0000000..d317eb4
--- /dev/null
+++ b/tests/cgp_seq_input_val_tests_cliutil.py
@@ -0,0 +1,22 @@
+from nose.tools import *
+import os, sys, tempfile
+import glob
+#from argparse import Namespace
+
+from cgp_seq_input_val.cliutil import extn_check
+from cgp_seq_input_val import constants
+
+import argparse
+
+test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'cliutil')
+
+def setup():
+    pass
+
+def teardown():
+    pass
+
+def test_extn_check_good():
+    parser = argparse.ArgumentParser()
+    for f in glob.glob(os.path.join(test_dir, 'good.*')):
+        extn_check(parser, constants.MANIFEST_EXTNS, f, readable=True)
diff --git a/tests/data/cliutil/good.csv b/tests/data/cliutil/good.csv
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/cliutil/good.tsv b/tests/data/cliutil/good.tsv
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/cliutil/good.xls b/tests/data/cliutil/good.xls
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/cliutil/good.xlsx b/tests/data/cliutil/good.xlsx
new file mode 100644
index 0000000..e69de29

From dad0ff5449988f602df3c609d7f00df19484cfde Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Mon, 9 Oct 2017 16:37:36 +0100
Subject: [PATCH 19/37] Doc use of radon and fix comment

---
 README.md                    | 1 +
 cgp_seq_input_val/cliutil.py | 8 ++------
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 10b3375..8532e2d 100644
--- a/README.md
+++ b/README.md
@@ -116,6 +116,7 @@ For testing/coverage (`./run_tests.sh`)
 
 ```
 env/bin/pip install nose
+env/bin/pip install radon
 env/bin/pip install coverage
 env/bin/pip install pylint
 ```
diff --git a/cgp_seq_input_val/cliutil.py b/cgp_seq_input_val/cliutil.py
index a69fa28..e417a66 100644
--- a/cgp_seq_input_val/cliutil.py
+++ b/cgp_seq_input_val/cliutil.py
@@ -14,13 +14,9 @@ def extn_check(parser, choices, fname, readable=False):
             handle = open(fname, 'r')
             handle.close()
         except FileNotFoundError as error: # pragma: no cover
+            # can't cover these easily
             parser.error(error)
     if extn not in choices: # pragma: no cover
+        # can't cover these easily
         parser.error("File doesn't end with {}".format(choices))
     return fname
-
-"""
-Why 'pragma: no cover'
-to cover parser errors in test cases you have to add a fair amount of additional
-code, as we know that raising an error this way is robust consider this covered.
-"""

From a0e897103a59d17e86e24b0892215032d3e90fdc Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Tue, 10 Oct 2017 09:03:51 +0100
Subject: [PATCH 20/37] Change mass required for duplication check, as picking
 up exception raising

---
 .codeclimate.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.codeclimate.yml b/.codeclimate.yml
index 06b5b21..656e321 100644
--- a/.codeclimate.yml
+++ b/.codeclimate.yml
@@ -5,7 +5,7 @@ engines:
       languages:
         python:
           python_version: 3
-          mass_threshold: 35
+          mass_threshold: 40
   fixme:
     enabled: true
   markdownlint:

From 2de1e4e80e947440ca93df6c42913ccbb59691be Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Tue, 10 Oct 2017 09:24:41 +0100
Subject: [PATCH 21/37] Cleanup lint and push duplication mass higher, may turn
 it off

---
 .codeclimate.yml             | 2 +-
 cgp_seq_input_val/cliutil.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.codeclimate.yml b/.codeclimate.yml
index 656e321..9fdff4c 100644
--- a/.codeclimate.yml
+++ b/.codeclimate.yml
@@ -5,7 +5,7 @@ engines:
       languages:
         python:
           python_version: 3
-          mass_threshold: 40
+          mass_threshold: 50
   fixme:
     enabled: true
   markdownlint:
diff --git a/cgp_seq_input_val/cliutil.py b/cgp_seq_input_val/cliutil.py
index e417a66..3b94747 100644
--- a/cgp_seq_input_val/cliutil.py
+++ b/cgp_seq_input_val/cliutil.py
@@ -13,10 +13,10 @@ def extn_check(parser, choices, fname, readable=False):
         try:
             handle = open(fname, 'r')
             handle.close()
-        except FileNotFoundError as error: # pragma: no cover
+        except FileNotFoundError as error:  # pragma: no cover
             # can't cover these easily
             parser.error(error)
-    if extn not in choices: # pragma: no cover
+    if extn not in choices:  # pragma: no cover
         # can't cover these easily
         parser.error("File doesn't end with {}".format(choices))
     return fname

From 93ec7df1c1ffd02cf8a16db64180fa0e45ff2f37 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Tue, 10 Oct 2017 13:50:02 +0100
Subject: [PATCH 22/37] More ways to keep on top of code

---
 run_tests.sh | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/run_tests.sh b/run_tests.sh
index 49a7eec..6f15bec 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -1,22 +1,23 @@
 #!/usr/bin/env bash
-env/bin/nosetests --with-coverage --cover-erase --cover-html --cover-package=cgp_seq_input_val
-code=$?
-
-if [ "$code" != "0" ]; then
-    exit $code
-fi
+set -e
+env/bin/nosetests --with-coverage --cover-erase --cover-html --cover-min-percentage=50 --cover-package=cgp_seq_input_val
+set +e
 
 # these should not die:
 
-echo -e "\n###################################"
-echo      "# Running radon (code complexity) #"
-echo      "###################################"
-env/bin/radon cc -nc bin cgp_seq_input_val
-
 echo -e "\n##########################"
 echo      "# Running pylint (style) #"
 echo      "##########################"
 env/bin/pylint --output-format=colorized bin/*.py cgp_seq_input_val
 
+echo -e "\n#########################################"
+echo      "# Running radon (cyclomatic complexity) #"
+echo      "#########################################"
+env/bin/radon cc -nc bin cgp_seq_input_val
+
+echo -e "\n#########################################"
+echo      "# Running radon (maintainability index) #"
+echo      "#########################################"
+env/bin/radon mi -s -n B bin cgp_seq_input_val
 
 exit 0 # don't die based on assements of code quality

From c2b91a480dafe7140bbb562c9dfc700874bd231c Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 11 Oct 2017 11:06:29 +0100
Subject: [PATCH 23/37] Reworked to use pytest and entry-points to do away with
 physical scripts

---
 .gitignore                                    |   3 +-
 README.md                                     |  34 ++-
 bin/normalise_manifest.py                     |  27 ---
 bin/validate_manifest.py                      |  38 ---
 bin/validate_seq_file.py                      |  49 ----
 cgp_seq_input_val/cliutil.py                  |   4 +-
 cgp_seq_input_val/command_line.py             |  60 +++++
 cgp_seq_input_val/manifest.py                 |  12 +
 cgp_seq_input_val/seq_validator.py            |  19 ++
 run_tests.sh                                  |   8 +-
 setup.py                                      |   7 +-
 tests/cgp_seq_input_val_tests_cliutil.py      |  22 --
 tests/cgp_seq_input_val_tests_fastq_read.py   |  47 ----
 tests/cgp_seq_input_val_tests_manifest.py     | 228 ------------------
 .../cgp_seq_input_val_tests_seq_validator.py  |  83 -------
 tests/data/cliutil/bad.extn                   |   0
 tests/test_cgp_seq_input_val_cliutil.py       |  32 +++
 tests/test_cgp_seq_input_val_fastq_read.py    |  51 ++++
 ...py => test_cgp_seq_input_val_file_meta.py} |  28 +--
 tests/test_cgp_seq_input_val_manifest.py      | 228 ++++++++++++++++++
 ...py => test_cgp_seq_input_val_normalise.py} |   5 +-
 tests/test_cgp_seq_input_val_seq_validator.py |  83 +++++++
 22 files changed, 534 insertions(+), 534 deletions(-)
 delete mode 100755 bin/normalise_manifest.py
 delete mode 100755 bin/validate_manifest.py
 delete mode 100755 bin/validate_seq_file.py
 create mode 100644 cgp_seq_input_val/command_line.py
 delete mode 100644 tests/cgp_seq_input_val_tests_cliutil.py
 delete mode 100644 tests/cgp_seq_input_val_tests_fastq_read.py
 delete mode 100644 tests/cgp_seq_input_val_tests_manifest.py
 delete mode 100644 tests/cgp_seq_input_val_tests_seq_validator.py
 create mode 100644 tests/data/cliutil/bad.extn
 create mode 100644 tests/test_cgp_seq_input_val_cliutil.py
 create mode 100644 tests/test_cgp_seq_input_val_fastq_read.py
 rename tests/{cgp_seq_input_val_tests_file_meta.py => test_cgp_seq_input_val_file_meta.py} (55%)
 create mode 100644 tests/test_cgp_seq_input_val_manifest.py
 rename tests/{cgp_seq_input_val_tests_normalise.py => test_cgp_seq_input_val_normalise.py} (96%)
 create mode 100644 tests/test_cgp_seq_input_val_seq_validator.py

diff --git a/.gitignore b/.gitignore
index d100e53..97b2970 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,8 @@ __pycache__/
 *.py[cod]
 *$py.class
 .coverage
-cover/
+.cache
+htmlcov/
 /.eggs
 /build
 /dist
diff --git a/README.md b/README.md
index 8532e2d..d3d1257 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,9 @@ the driver code.
 
 ## Tools
 
-### normalise_manifest.py
+`cgpSeqInputVal` has multiple sub commands, listed with `cgpSeqInputVal --help`.
+
+### cgpSeqInputVal man-norm
 
 Takes input in multiple types and converts to tsv.  If intput is tsv just copied
 the file to the output location (to simplify usage in workflows).  Valid input types
@@ -24,7 +26,7 @@ include:
 
 Absolutely no validation is carried out here.
 
-### validate_manifest.py
+### cgpSeqInputVal man-valid
 
 Takes the `tsv` representation of a manifest and performs validation of the structure
 and data values.  The checks applied are managed by the `cgp_seq_input_val/config/*.json`
@@ -37,7 +39,7 @@ The output is a lightly modified version of the input, adding:
 
 And a `json` version of the file ready for use by downstream systems.
 
-### validate_seq_file.py
+### cgpSeqInputVal seq-valid
 
 Takes an interleaved or a pair of paired-fastq files and produces a simple report
 of:
@@ -108,17 +110,20 @@ in the base of the clone:
 cd $PROJECTROOT
 hash virtualenv || pip3 install virtualenv
 virtualenv -p python3 env
-env/bin/pip install progressbar2
-env/bin/pip install xlrd
+source env/bin/activate
+pip install progressbar2
+pip install xlrd
+python setup.py develop # so bin scripts can find module
 ```
 
 For testing/coverage (`./run_tests.sh`)
 
 ```
-env/bin/pip install nose
-env/bin/pip install radon
-env/bin/pip install coverage
-env/bin/pip install pylint
+source env/bin/activate # if not already in env
+pip install pytest
+pip install pytest-cov
+pip install pylint
+pip install radon
 ```
 
 __Also see__ [Package Dependancies](#package-dependancies)
@@ -127,10 +132,13 @@ __Also see__ [Package Dependancies](#package-dependancies)
 
 __Make sure the version is incremented__ in `./setup.py`
 
-The release is handled by setuptools:
+The release is handled by wheel:
 
 ```bash
-$ ./setup.py bdist_egg
-# this creates an egg which can be copied to a deployment location, e.g.
-scp dist/cgp_seq_input_val-0.1.0-py3.6.egg user@host:~/
+$ source env/bin/activate # if not already
+$ python setup.py bdist_wheel -d dist
+# this creates an wheel archive which can be copied to a deployment location, e.g.
+$ scp cgp_seq_input_val-1.1.0-py3-none-any.whl user@host:~/wheels
+# on host
+$ pip install --find-links=~/wheels cgp_seq_input_val
 ```
diff --git a/bin/normalise_manifest.py b/bin/normalise_manifest.py
deleted file mode 100755
index 9033fca..0000000
--- a/bin/normalise_manifest.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python3
-
-"""Converts manifests from various types to TSV"""
-
-# python builtin
-import argparse
-import pkg_resources  # part of setuptools
-
-# this project
-from cgp_seq_input_val import constants, cliutil
-from cgp_seq_input_val.manifest import normalise
-
-version = pkg_resources.require("cgp_seq_input_val")[0].version
-
-# read variables, auto help text
-parser = argparse.ArgumentParser(description='Convert manifest files to common denominator (tsv)',
-                                 epilog='Input can be [xls|xlsx|csv|tsv].  "tsv" is just copied to maintain tool-chain')
-parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
-parser.add_argument('-i', '--input', dest='input', metavar='FILE',
-                    help='Input manifest in friendly formats', required=True,
-                    type=lambda s: cliutil.extn_check(parser, constants.MANIFEST_EXTNS, s, readable=True))
-parser.add_argument('-o', '--output', dest='output', metavar='FILE',
-                    help='Output file *.tsv [default: sub. extension]', required=False,
-                    type=lambda s: cliutil.extn_check(parser, ('tsv'), s))
-args = parser.parse_args()
-
-normalise(args)
diff --git a/bin/validate_manifest.py b/bin/validate_manifest.py
deleted file mode 100755
index a753d58..0000000
--- a/bin/validate_manifest.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env python3
-
-"""Validates the normalised TSV manifest files"""
-
-# python builtin
-import sys
-import argparse
-import pkg_resources  # part of setuptools
-
-# this project
-from cgp_seq_input_val import cliutil
-from cgp_seq_input_val.manifest import Manifest
-from cgp_seq_input_val.manifest import ValidationError
-
-version = pkg_resources.require("cgp_seq_input_val")[0].version
-
-# read variables, auto help text
-parser = argparse.ArgumentParser(description='Validate a tsv import manifest file')
-parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
-parser.add_argument('-i', '--input', dest='input', metavar='FILE',
-                    help='Input manifest in tsv formats', required=True,
-                    type=lambda s: cliutil.extn_check(parser, ('tsv'), s, readable=True))
-parser.add_argument('-o', '--output', dest='output', metavar='DIR',
-                    help='Output manifest to this area, two files (tsv, json)', required=True)
-parser.add_argument('-c', '--checkfiles', dest='checkfiles', action='store_true',
-                    help='When present check file exist and are non-zero size')
-
-args = parser.parse_args()
-
-try:
-    manifest = Manifest(args.input)
-    manifest.validate()
-    # output new manifest in tsv and json.
-    (tsv_file, json_file) = manifest.write(args.output)
-    print("Created files:\n\t%s\n\t%s" % (tsv_file, json_file))
-except ValidationError as ve:
-    print("ERROR: " + str(ve), file=sys.stderr)
-    exit(1)
diff --git a/bin/validate_seq_file.py b/bin/validate_seq_file.py
deleted file mode 100755
index 65924b5..0000000
--- a/bin/validate_seq_file.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Validates up to 2 sequencing data files
-When 2 found these should be paired fastq[.gz],
-otherwise expecting interleaved fastq.
-
-May be extended to cover BAM/CRAM at a later date.
-"""
-
-# python builtin
-import sys
-import argparse
-import pkg_resources  # part of setuptools
-
-# this project
-from cgp_seq_input_val.seq_validator import SeqValidator
-from cgp_seq_input_val.seq_validator import SeqValidationError
-
-version = pkg_resources.require("cgp_seq_input_val")[0].version
-
-parser = argparse.ArgumentParser(description="""Validates up to 2 sequencing data files.""")
-parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
-parser.add_argument('-r', '--report', dest='report', type=argparse.FileType('w'), default='-',
-                    help='Output json report', required=False)
-parser.add_argument('-i', '--input', dest='input', metavar='FILE', nargs='+',
-                    help='Input manifest in tsv formats', required=True)
-
-args = parser.parse_args()
-
-try:
-    file_2 = None
-    if len(args.input) == 2:
-        file_2 = args.input[1]
-    validator = SeqValidator(args.input[0], file_2)
-    validator.validate()
-    validator.report(args.report)
-except SeqValidationError as ve:  # runtime so no functions for message and errno
-    print("ERROR: " + str(ve), file=sys.stderr)
-    exit(1)
-# have to catch 2 classes works 3.0-3.3, above 3.3 all IO issues are captured under OSError
-except (OSError, IOError) as err:
-    print("ERROR: %s - %s" % (err.strerror, err.filename), file=sys.stderr)
-    exit(err.errno)
-
-# Interleaved fastq to paired:
-# gnu-sed needed
-# zcat 242215_i.fq.gz | gsed -n '1~8,+3p' | gzip -c > 242215_1.fq.gz
-# zcat 242215_i.fq.gz | gsed -n '5~8,+3p' | gzip -c > 242215_2.fq.gz
diff --git a/cgp_seq_input_val/cliutil.py b/cgp_seq_input_val/cliutil.py
index 3b94747..25c821c 100644
--- a/cgp_seq_input_val/cliutil.py
+++ b/cgp_seq_input_val/cliutil.py
@@ -13,10 +13,10 @@ def extn_check(parser, choices, fname, readable=False):
         try:
             handle = open(fname, 'r')
             handle.close()
-        except FileNotFoundError as error:  # pragma: no cover
+        except FileNotFoundError as error:
             # can't cover these easily
             parser.error(error)
-    if extn not in choices:  # pragma: no cover
+    if extn not in choices:
         # can't cover these easily
         parser.error("File doesn't end with {}".format(choices))
     return fname
diff --git a/cgp_seq_input_val/command_line.py b/cgp_seq_input_val/command_line.py
new file mode 100644
index 0000000..7cd6d2d
--- /dev/null
+++ b/cgp_seq_input_val/command_line.py
@@ -0,0 +1,60 @@
+"""
+Handle the command line parsing and select the correct sub process.
+"""
+
+import argparse
+import sys
+import pkg_resources  # part of setuptools
+
+from cgp_seq_input_val import constants, cliutil
+from cgp_seq_input_val.manifest import normalise
+from cgp_seq_input_val.manifest import wrapped_validate
+from cgp_seq_input_val.seq_validator import validate_seq_files
+version = pkg_resources.require("cgp_seq_input_val")[0].version
+
+def main():
+    """
+    Sets up the parser and handles triggereing of correct sub-command
+    """
+    parser = argparse.ArgumentParser(prog='cgpSeqInputVal')
+    subparsers = parser.add_subparsers(help='sub-command help')
+
+    # create the parser for the "man-norm" command
+    parser_a = subparsers.add_parser('man-norm', description='Convert manifest files to common denominator (tsv)',
+                                     epilog='Input can be [xls|xlsx|csv|tsv].  \
+                                     "tsv" is just copied to maintain tool-chain')
+    parser_a.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
+    parser_a.add_argument('-i', '--input', dest='input', metavar='FILE',
+                          help='Input manifest in friendly formats', required=True,
+                          type=lambda s: cliutil.extn_check(parser, constants.MANIFEST_EXTNS, s, readable=True))
+    parser_a.add_argument('-o', '--output', dest='output', metavar='FILE',
+                          help='Output file *.tsv [default: sub. extension]', required=False,
+                          type=lambda s: cliutil.extn_check(parser, ('tsv'), s))
+    parser_a.set_defaults(func=normalise)
+
+    # create the parser for the "man-valid" command
+    parser_b = subparsers.add_parser('man-valid', description='Validate a tsv import manifest file')
+    parser_b.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
+    parser_b.add_argument('-i', '--input', dest='input', metavar='FILE',
+                          help='Input manifest in tsv formats', required=True,
+                          type=lambda s: cliutil.extn_check(parser, ('tsv'), s, readable=True))
+    parser_b.add_argument('-o', '--output', dest='output', metavar='DIR',
+                          help='Output manifest to this area, two files (tsv, json)', required=True)
+    parser_b.add_argument('-c', '--checkfiles', dest='checkfiles', action='store_true',
+                          help='When present check file exist and are non-zero size')
+    parser_b.set_defaults(func=wrapped_validate)
+
+    # create the parser for the "seq-valid" command
+    parser_c = subparsers.add_parser('seq-valid', description='Validates up to 2 sequencing data files.')
+    parser_c.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
+    parser_c.add_argument('-r', '--report', dest='report', type=argparse.FileType('w'), default='-',
+                          help='Output json report', required=False)
+    parser_c.add_argument('-i', '--input', dest='input', metavar='FILE', nargs='+',
+                          help='Input manifest in tsv formats', required=True)
+    parser_c.set_defaults(func=validate_seq_files)
+
+    args = parser.parse_args()
+    if len(sys.argv) > 1:
+        args.func(args)
+    else:
+        sys.exit('\nERROR Arguments required\n\tPlease run: cgpSeqInputVal --help\n')
diff --git a/cgp_seq_input_val/manifest.py b/cgp_seq_input_val/manifest.py
index 263974f..809e0a1 100644
--- a/cgp_seq_input_val/manifest.py
+++ b/cgp_seq_input_val/manifest.py
@@ -20,6 +20,18 @@
 VAL_LIM_CONFIG_ERROR = "'limit' and 'limit_by' must both be defined when either \
                        is present, check body.validate."
 
+def wrapped_validate(args):
+    """
+    Top level entry point for validating a manifest
+    """
+    try:
+        manifest = Manifest(args.input)
+        manifest.validate()
+        # output new manifest in tsv and json.
+        (tsv_file, json_file) = manifest.write(args.output)
+        print("Created files:\n\t%s\n\t%s" % (tsv_file, json_file))
+    except ValidationError as ve:
+        sys.exit("ERROR: " + str(ve))
 
 def uuid4_chk(uuid_str):
     """Tests validity of uuid"""
diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py
index 9d0b62a..59ec1b2 100644
--- a/cgp_seq_input_val/seq_validator.py
+++ b/cgp_seq_input_val/seq_validator.py
@@ -16,6 +16,25 @@
 
 prog_records = 100000
 
+def validate_seq_files(args):
+    """
+    Top level entry point for validating sequence files.
+    """
+    try:
+        file_2 = None
+        if len(args.input) == 2:
+            file_2 = args.input[1]
+        validator = SeqValidator(args.input[0], file_2)
+        validator.validate()
+        validator.report(args.report)
+    except SeqValidationError as ve:  # runtime so no functions for message and errno
+        print("ERROR: " + str(ve), file=sys.stderr)
+        exit(1)
+    # have to catch 2 classes works 3.0-3.3, above 3.3 all IO issues are captured under OSError
+    except (OSError, IOError) as err:
+        print("ERROR: %s - %s" % (err.strerror, err.filename), file=sys.stderr)
+        exit(err.errno)
+
 
 class SeqValidator(object):
     """
diff --git a/run_tests.sh b/run_tests.sh
index 6f15bec..e9a90b5 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 set -e
-env/bin/nosetests --with-coverage --cover-erase --cover-html --cover-min-percentage=50 --cover-package=cgp_seq_input_val
+pytest --cov-report term --cov-report html --cov=cgp_seq_input_val --cov-fail-under=50
 set +e
 
 # these should not die:
@@ -8,16 +8,16 @@ set +e
 echo -e "\n##########################"
 echo      "# Running pylint (style) #"
 echo      "##########################"
-env/bin/pylint --output-format=colorized bin/*.py cgp_seq_input_val
+pylint --output-format=colorized cgp_seq_input_val
 
 echo -e "\n#########################################"
 echo      "# Running radon (cyclomatic complexity) #"
 echo      "#########################################"
-env/bin/radon cc -nc bin cgp_seq_input_val
+radon cc -nc cgp_seq_input_val
 
 echo -e "\n#########################################"
 echo      "# Running radon (maintainability index) #"
 echo      "#########################################"
-env/bin/radon mi -s -n B bin cgp_seq_input_val
+radon mi -s -n B cgp_seq_input_val
 
 exit 0 # don't die based on assements of code quality
diff --git a/setup.py b/setup.py
index fc2874b..9408ecc 100755
--- a/setup.py
+++ b/setup.py
@@ -11,12 +11,13 @@
     'author_email': 'cgphelp@sanger.ac.uk',
     'version': '1.1.0',
     'python_requires': '>= 3.3',
-    'setup_requires': ['nose>=1.0'],
+    'setup_requires': ['pytest'],
     'install_requires': ['progressbar2', 'xlrd'],
     'packages': ['cgp_seq_input_val'],
     'package_data': {'cgp_seq_input_val': ['config/*.json']},
-    'scripts': ['bin/normalise_manifest.py', 'bin/validate_manifest.py',
-                'bin/validate_seq_file.py']
+    'entry_points': {
+        'console_scripts': ['cgpSeqInputVal=cgp_seq_input_val.command_line:main'],
+    }
 }
 
 setup(**config)
diff --git a/tests/cgp_seq_input_val_tests_cliutil.py b/tests/cgp_seq_input_val_tests_cliutil.py
deleted file mode 100644
index d317eb4..0000000
--- a/tests/cgp_seq_input_val_tests_cliutil.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from nose.tools import *
-import os, sys, tempfile
-import glob
-#from argparse import Namespace
-
-from cgp_seq_input_val.cliutil import extn_check
-from cgp_seq_input_val import constants
-
-import argparse
-
-test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'cliutil')
-
-def setup():
-    pass
-
-def teardown():
-    pass
-
-def test_extn_check_good():
-    parser = argparse.ArgumentParser()
-    for f in glob.glob(os.path.join(test_dir, 'good.*')):
-        extn_check(parser, constants.MANIFEST_EXTNS, f, readable=True)
diff --git a/tests/cgp_seq_input_val_tests_fastq_read.py b/tests/cgp_seq_input_val_tests_fastq_read.py
deleted file mode 100644
index 2ce3802..0000000
--- a/tests/cgp_seq_input_val_tests_fastq_read.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from nose.tools import *
-import os, sys, tempfile
-
-from cgp_seq_input_val.fastq_read import FastqRead
-from cgp_seq_input_val.error_classes import SeqValidationError
-
-test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'fastq_read')
-
-def setup():
-    pass
-
-def teardown():
-    pass
-
-@raises(SeqValidationError)
-def test_fastq_header_no_at():
-    fqi = os.path.join(test_dir, 'bad_header_1.fq')
-    with open(fqi, 'r') as fp:
-        fr = FastqRead(fp, 0, None)
-        fr.validate('x')
-
-@raises(SeqValidationError)
-def test_fastq_header_at_only():
-    fqi = os.path.join(test_dir, 'bad_header_2.fq')
-    with open(fqi, 'r') as fp:
-        fr = FastqRead(fp, 0, None)
-        fr.validate('x')
-
-@raises(SeqValidationError)
-def test_fastq_seq_shorter_than_qual():
-    fqi = os.path.join(test_dir, 'seq-shorter_1.fq')
-    with open(fqi, 'r') as fp:
-        fr = FastqRead(fp, 0, None)
-        fr.validate('x')
-
-@raises(SeqValidationError)
-def test_fastq_qual_shorter_than_seq():
-    fqi = os.path.join(test_dir, 'qual-shorter_1.fq')
-    with open(fqi, 'r') as fp:
-        fr = FastqRead(fp, 0, None)
-        fr.validate('x')
-
-def test_fastq_string_print():
-    fqi = os.path.join(test_dir, 'good_read_1.fq')
-    with open(fqi, 'r') as fp:
-        fr = FastqRead(fp, 0, None)
-        t = str(fr)
diff --git a/tests/cgp_seq_input_val_tests_manifest.py b/tests/cgp_seq_input_val_tests_manifest.py
deleted file mode 100644
index 5473a49..0000000
--- a/tests/cgp_seq_input_val_tests_manifest.py
+++ /dev/null
@@ -1,228 +0,0 @@
-from nose.tools import *
-import sys, os, tempfile, shutil, json
-from cgp_seq_input_val.manifest import Manifest, Header, Body, ConfigError, ParsingError, ValidationError
-from argparse import Namespace
-
-data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'data')
-test_data = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')
-configs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'configs')
-
-def setup_args(indir, intype, tmpd):
-    '''
-    Simplifies tests by generating standardised in/out for specified filetypes
-    '''
-    return Namespace(input=os.path.join(indir, 'SimplifiedManifest_v1.0.%s' % (intype)),
-                     output=os.path.join(tmpd, '%s_to.tsv' % (intype)) )
-
-def setup():
-    print("SETUP!")
-
-def teardown():
-    print("TEAR DOWN!")
-
-### Manifest tests
-
-@raises(ValueError)
-def test_manifest_bad_filetype():
-    infile = os.path.join(data_dir, 'SimplifiedManifest_v1.0.xls')
-    manifest = Manifest(infile)
-    manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_missing_required():
-     infile = os.path.join(data_dir, 'SimplifiedManifest_v1.0.tsv')
-     manifest = Manifest(infile)
-     manifest.validate()
-
-def test_manifest_write():
-    with tempfile.TemporaryDirectory() as tmpd:
-        manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv'))
-        manifest.validate()
-        (tsv_file, json_file) = manifest.write(tmpd) # output new manifest in tsv and json.
-
-def test_manifest_uuid():
-    with tempfile.TemporaryDirectory() as tmpd:
-        manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv'))
-        manifest.validate()
-        assert manifest.get_uuid()
-
-def test_manifest_existing_uuid():
-    with tempfile.TemporaryDirectory() as tmpd:
-        manifest = Manifest(os.path.join(test_data, 'with_uuid.tsv'))
-        manifest.validate()
-        assert manifest.get_uuid() == '05218fd0-79e5-4214-92d5-e133cd16a798'
-
-@raises(ValidationError)
-def test_manifest_existing_bad_uuid():
-    with tempfile.TemporaryDirectory() as tmpd:
-        manifest = Manifest(os.path.join(test_data, 'with_bad_uuid.tsv'))
-        manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_uuid_novalidate():
-    with tempfile.TemporaryDirectory() as tmpd:
-        manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv'))
-        assert manifest.get_uuid()
-
-### Config parsing tests
-
-@raises(ParsingError)
-def test_manifest_get_config_bad_type():
-     # need a good file to setup and then test get_config with a bad config file
-     header = Header(os.path.join(test_data, 'good_manifest.tsv'))
-     header.get_config(os.path.join(configs, 'bad_type', 'IMPORT-1.0.json'))
-
-@raises(ParsingError)
-def test_manifest_get_config_bad_version():
-    # need a good file to setup and then test get_config with a bad config file
-    header = Header(os.path.join(test_data, 'good_manifest.tsv'))
-    header.get_config(os.path.join(configs, 'bad_version', 'IMPORT-1.0.json'))
-
-@raises(ConfigError)
-def test_manifest_json_no_body():
-    # need a good file to setup and then test get_config with a bad config file
-    header = Header(os.path.join(test_data, 'good_manifest.tsv'))
-    header.get_config(os.path.join(configs, 'no_body', 'IMPORT-1.0.json'))
-
-@raises(ConfigError)
-def test_manifest_json_no_expected():
-    # need a good file to setup and then test get_config with a bad config file
-    header = Header(os.path.join(test_data, 'good_manifest.tsv'))
-    header.get_config(os.path.join(configs, 'no_expected', 'IMPORT-1.0.json'))
-
-@raises(ConfigError)
-def test_manifest_json_no_header():
-    # need a good file to setup and then test get_config with a bad config file
-    header = Header(os.path.join(test_data, 'good_manifest.tsv'))
-    header.get_config(os.path.join(configs, 'no_header', 'IMPORT-1.0.json'))
-
-@raises(ConfigError)
-def test_manifest_json_no_required():
-    # need a good file to setup and then test get_config with a bad config file
-    header = Header(os.path.join(test_data, 'good_manifest.tsv'))
-    header.get_config(os.path.join(configs, 'no_required', 'IMPORT-1.0.json'))
-
-@raises(ConfigError)
-def test_manifest_json_no_validate():
-    # need a good file to setup and then test get_config with a bad config file
-    header = Header(os.path.join(test_data, 'good_manifest.tsv'))
-    header.get_config(os.path.join(configs, 'no_validate', 'IMPORT-1.0.json'))
-
-@raises(ConfigError)
-def test_manifest_json_limit_no_limit_by():
-    # need a good file to setup and then test get_config with a bad config file
-    infile = os.path.join(test_data,
-                         'file_set_good',
-                         'files_good.tsv')
-    header = Header(infile)
-    cfg = header.get_config(os.path.join(configs, 'limit_no_limit_by', 'IMPORT-1.0.json'))
-    body = Body(infile, cfg['body'])
-    body.validate(cfg['body'])
-
-@raises(ConfigError)
-def test_manifest_json_limit_by_no_limit():
-    # need a good file to setup and then test get_config with a bad config file
-    infile = os.path.join(test_data,
-                         'file_set_good',
-                         'files_good.tsv')
-    header = Header(infile)
-    cfg = header.get_config(os.path.join(configs, 'limit_by_no_limit', 'IMPORT-1.0.json'))
-    body = Body(infile, cfg['body'])
-    body.validate(cfg['body'])
-
-### Header tests
-
-@raises(ValidationError)
-def test_manifest_extra_header():
-     # need a good file to setup and then test get_config with a bad config file
-     header = Header(os.path.join(test_data, 'extraHeader.tsv'))
-     config = header.get_config()
-     header.validate(config['header'])
-
-@raises(ValidationError)
-def test_manifest_missing_header():
-     # need a good file to setup and then test get_config with a bad config file
-     header = Header(os.path.join(test_data, 'missingHeader.tsv'))
-     config = header.get_config()
-     header.validate(config['header'])
-
-@raises(ValidationError)
-def test_manifest_invalid_header_val():
-     # need a good file to setup and then test get_config with a bad config file
-     header = Header(os.path.join(test_data, 'invalidHeaderVal.tsv'))
-     config = header.get_config()
-     header.validate(config['header'])
-
-### Body tests
-
-@raises(ValidationError)
-def test_manifest_invalid_body_val():
-    infile = os.path.join(test_data, 'invalidBodyVal.tsv')
-    manifest = Manifest(infile)
-    manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_absent_body_val():
-    infile = os.path.join(test_data, 'absentBodyVal.tsv')
-    manifest = Manifest(infile)
-    manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_period_body_val():
-    infile = os.path.join(test_data, 'periodBodyVal.tsv')
-    manifest = Manifest(infile)
-    manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_dup_files_same_row():
-    infile = os.path.join(test_data, 'dupFilesSameRow.tsv')
-    manifest = Manifest(infile)
-    manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_dup_files_diff_row():
-    infile = os.path.join(test_data, 'dupFilesDiffRow.tsv')
-    manifest = Manifest(infile)
-    manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_body_head_order():
-    infile = os.path.join(test_data, 'bodyHeadOrder.tsv')
-    manifest = Manifest(infile)
-    manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_extn_file1():
-    infile = os.path.join(test_data, 'invalidExtnFile1.tsv')
-    manifest = Manifest(infile)
-    manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_extn_file2():
-    infile = os.path.join(test_data, 'invalidExtnFile2.tsv')
-    manifest = Manifest(infile)
-    manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_paired_extn_mismatch():
-    infile = os.path.join(test_data, 'pairedExtnMismatch.tsv')
-    manifest = Manifest(infile)
-    manifest.validate()
-
-@raises(ValidationError)
-def test_manifest_limit_exceeded():
-    # need a good file to setup and then test get_config with a bad config file
-    infile = os.path.join(test_data,
-                         'file_set_good',
-                         'files_good.tsv')
-    header = Header(infile)
-    cfg = header.get_config(os.path.join(configs, 'limit_to_exceed', 'IMPORT-1.0.json'))
-    body = Body(infile, cfg['body'])
-    body.validate(cfg['body'])
-
-def test_manifest_file_set_good():
-    infile = os.path.join(test_data, 'file_set_good',
-                         'files_good.tsv')
-    manifest = Manifest(infile)
-    manifest.validate(True)
-    as_json = json.dumps(manifest.for_json())
diff --git a/tests/cgp_seq_input_val_tests_seq_validator.py b/tests/cgp_seq_input_val_tests_seq_validator.py
deleted file mode 100644
index 91ea5fe..0000000
--- a/tests/cgp_seq_input_val_tests_seq_validator.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from nose.tools import *
-import os, sys, tempfile
-
-from cgp_seq_input_val.seq_validator import SeqValidator
-from cgp_seq_input_val.error_classes import SeqValidationError
-
-test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'fastq_read')
-
-def setup():
-    pass
-
-def teardown():
-    pass
-
-def test_seq_val_i_read_good():
-    fqi = os.path.join(test_dir, 'good_read_i.fq')
-    sv = SeqValidator(fqi, None, progress_pairs=1)
-    sv.validate()
-
-def test_seq_val_p_read_good():
-    fq1 = os.path.join(test_dir, 'good_read_1.fq')
-    fq2 = os.path.join(test_dir, 'good_read_2.fq')
-    sv = SeqValidator(fq1, fq2, progress_pairs=1)
-    sv.validate()
-
-def test_seq_val_i_gz_read_good():
-    fqi = os.path.join(test_dir, 'good_read_i.fq.gz')
-    sv = SeqValidator(fqi, None, progress_pairs=0)
-    sv.validate()
-    t = str(sv)
-    sv.report(sys.stdout)
-
-def test_seq_val_p_gz_read_good():
-    fq1 = os.path.join(test_dir, 'good_read_1.fq.gz')
-    fq2 = os.path.join(test_dir, 'good_read_2.fq.gz')
-    sv = SeqValidator(fq1, fq2, progress_pairs=0)
-    sv.validate()
-
-@raises(SeqValidationError)
-def test_seq_val_bad_file():
-    fqi = os.path.join(test_dir, 'good_read_i.BAD')
-    sv = SeqValidator(fqi, None, progress_pairs=0)
-
-@raises(SeqValidationError)
-def test_seq_val_mismatch_ext():
-    fq1 = os.path.join(test_dir, 'good_read_1.fq')
-    fq2 = os.path.join(test_dir, 'good_read_2.fq.gz')
-    sv = SeqValidator(fq1, fq2, progress_pairs=0)
-
-@raises(SeqValidationError)
-def test_seq_val_more_read2():
-    fq1 = os.path.join(test_dir, 'good_read_1.fq')
-    fq2 = os.path.join(test_dir, '2_reads_2.fq')
-    sv = SeqValidator(fq1, fq2, progress_pairs=0)
-    sv.validate()
-
-@raises(SeqValidationError)
-def test_seq_val_more_read1():
-    fq1 = os.path.join(test_dir, '2_reads_1.fq')
-    fq2 = os.path.join(test_dir, 'good_read_2.fq')
-    sv = SeqValidator(fq1, fq2, progress_pairs=0)
-    sv.validate()
-
-@raises(SeqValidationError)
-def test_seq_val_r1_in_2():
-    fq1 = os.path.join(test_dir, 'good_read_1.fq')
-    fq2 = os.path.join(test_dir, 'r1_reads_in_2.fq')
-    sv = SeqValidator(fq1, fq2, progress_pairs=0)
-    sv.validate()
-
-@raises(SeqValidationError)
-def test_seq_val_r2_in_1():
-    fq1 = os.path.join(test_dir, 'good_read_2.fq')
-    fq2 = os.path.join(test_dir, 'r2_reads_in_1.fq')
-    sv = SeqValidator(fq1, fq2, progress_pairs=0)
-    sv.validate()
-
-@raises(SeqValidationError)
-def test_seq_val_fq_name():
-    fq1 = os.path.join(test_dir, 'good_read_1.fq')
-    fq2 = os.path.join(test_dir, 'diff_2.fq')
-    sv = SeqValidator(fq1, fq2, progress_pairs=0)
-    sv.validate()
diff --git a/tests/data/cliutil/bad.extn b/tests/data/cliutil/bad.extn
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_cgp_seq_input_val_cliutil.py b/tests/test_cgp_seq_input_val_cliutil.py
new file mode 100644
index 0000000..38cf326
--- /dev/null
+++ b/tests/test_cgp_seq_input_val_cliutil.py
@@ -0,0 +1,32 @@
+import pytest
+import os, sys, tempfile
+import glob
+#from argparse import Namespace
+
+from cgp_seq_input_val.cliutil import extn_check
+from cgp_seq_input_val import constants
+
+import argparse
+
+test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'cliutil')
+
+def setup():
+    pass
+
+def teardown():
+    pass
+
+def test_extn_check_good_extn():
+    parser = argparse.ArgumentParser()
+    for f in glob.glob(os.path.join(test_dir, 'good.*')):
+        extn_check(parser, constants.MANIFEST_EXTNS, f, readable=True)
+
+def test_extn_check_f_not_f():
+    with pytest.raises(SystemExit) as e_info:
+        parser = argparse.ArgumentParser()
+        extn_check(parser, constants.MANIFEST_EXTNS, '/I_wont_exist_cgp_seq_input_val', readable=True)
+
+def test_extn_check_bad_extn():
+    with pytest.raises(SystemExit) as e_info:
+        parser = argparse.ArgumentParser()
+        extn_check(parser, constants.MANIFEST_EXTNS, os.path.join(test_dir, 'bad.extn'), readable=True)
diff --git a/tests/test_cgp_seq_input_val_fastq_read.py b/tests/test_cgp_seq_input_val_fastq_read.py
new file mode 100644
index 0000000..4625720
--- /dev/null
+++ b/tests/test_cgp_seq_input_val_fastq_read.py
@@ -0,0 +1,51 @@
+import pytest
+import os, sys, tempfile
+
+from cgp_seq_input_val.fastq_read import FastqRead
+from cgp_seq_input_val.error_classes import SeqValidationError
+
+test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'fastq_read')
+
+def setup():
+    pass
+
+def teardown():
+    pass
+
+def test_fastq_header_no_at():
+    with pytest.raises(SeqValidationError) as e_info:
+        fqi = os.path.join(test_dir, 'bad_header_1.fq')
+        with open(fqi, 'r') as fp:
+            fr = FastqRead(fp, 0, None)
+            fr.validate('x')
+
+
+def test_fastq_header_at_only():
+    with pytest.raises(SeqValidationError) as e_info:
+        fqi = os.path.join(test_dir, 'bad_header_2.fq')
+        with open(fqi, 'r') as fp:
+            fr = FastqRead(fp, 0, None)
+            fr.validate('x')
+
+
+def test_fastq_seq_shorter_than_qual():
+    with pytest.raises(SeqValidationError) as e_info:
+        fqi = os.path.join(test_dir, 'seq-shorter_1.fq')
+        with open(fqi, 'r') as fp:
+            fr = FastqRead(fp, 0, None)
+            fr.validate('x')
+
+
+def test_fastq_qual_shorter_than_seq():
+    with pytest.raises(SeqValidationError) as e_info:
+        fqi = os.path.join(test_dir, 'qual-shorter_1.fq')
+        with open(fqi, 'r') as fp:
+            fr = FastqRead(fp, 0, None)
+            fr.validate('x')
+
+
+def test_fastq_string_print():
+    fqi = os.path.join(test_dir, 'good_read_1.fq')
+    with open(fqi, 'r') as fp:
+        fr = FastqRead(fp, 0, None)
+        t = str(fr)
diff --git a/tests/cgp_seq_input_val_tests_file_meta.py b/tests/test_cgp_seq_input_val_file_meta.py
similarity index 55%
rename from tests/cgp_seq_input_val_tests_file_meta.py
rename to tests/test_cgp_seq_input_val_file_meta.py
index f9f9ac5..b44a55b 100644
--- a/tests/cgp_seq_input_val_tests_file_meta.py
+++ b/tests/test_cgp_seq_input_val_file_meta.py
@@ -1,4 +1,4 @@
-from nose.tools import *
+import pytest
 from cgp_seq_input_val.file_meta import FileValidationError, FileMeta
 import os, sys, tempfile
 
@@ -10,22 +10,22 @@ def teardown():
 
 ### FileMeta tests
 
-@raises(FileValidationError)
 def test_file_meta_file_absent():
-    headers = ["Group_ID", "Sample", "Normal_Tissue", "Group_Control", "Library", "File", "File_2"]
-    details = ["1", "Start", "Y", "Y", "1", "bello.bam"]
-    fm = FileMeta(headers, details, '/')
-    fm.test_files(1)
+    with pytest.raises(FileValidationError) as e_info:
+        headers = ["Group_ID", "Sample", "Normal_Tissue", "Group_Control", "Library", "File", "File_2"]
+        details = ["1", "Start", "Y", "Y", "1", "bello.bam"]
+        fm = FileMeta(headers, details, '/')
+        fm.test_files(1)
 
-@raises(FileValidationError)
 def test_file_meta_file_empty():
-    headers = ["Group_ID", "Sample", "Normal_Tissue", "Group_Control", "Library", "File", "File_2"]
-    details = ["1", "Start", "Y", "Y", "1", "bello.bam"]
-    with tempfile.TemporaryDirectory() as tmpd:
-        with open(os.path.join(tmpd, 'bello.bam'), 'w'):
-            pass
-        fm = FileMeta(headers, details, tmpd)
-        fm.test_files(1)
+    with pytest.raises(FileValidationError) as e_info:
+        headers = ["Group_ID", "Sample", "Normal_Tissue", "Group_Control", "Library", "File", "File_2"]
+        details = ["1", "Start", "Y", "Y", "1", "bello.bam"]
+        with tempfile.TemporaryDirectory() as tmpd:
+            with open(os.path.join(tmpd, 'bello.bam'), 'w'):
+                pass
+            fm = FileMeta(headers, details, tmpd)
+            fm.test_files(1)
 
 def test_file_meta_get_path():
     headers = ["Group_ID", "Sample", "Normal_Tissue", "Group_Control", "Library", "File", "File_2"]
diff --git a/tests/test_cgp_seq_input_val_manifest.py b/tests/test_cgp_seq_input_val_manifest.py
new file mode 100644
index 0000000..6b624e0
--- /dev/null
+++ b/tests/test_cgp_seq_input_val_manifest.py
@@ -0,0 +1,228 @@
+import pytest
+import sys, os, tempfile, shutil, json
+from cgp_seq_input_val.manifest import Manifest, Header, Body, ConfigError, ParsingError, ValidationError
+from argparse import Namespace
+
+data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'data')
+test_data = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')
+configs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'configs')
+
+def setup_args(indir, intype, tmpd):
+    '''
+    Simplifies tests by generating standardised in/out for specified filetypes
+    '''
+    return Namespace(input=os.path.join(indir, 'SimplifiedManifest_v1.0.%s' % (intype)),
+                     output=os.path.join(tmpd, '%s_to.tsv' % (intype)) )
+
+def setup():
+    print("SETUP!")
+
+def teardown():
+    print("TEAR DOWN!")
+
+### Manifest tests
+
+def test_manifest_bad_filetype():
+    with pytest.raises(ValueError) as e_info:
+        infile = os.path.join(data_dir, 'SimplifiedManifest_v1.0.xls')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_missing_required():
+    with pytest.raises(ValidationError) as e_info:
+        infile = os.path.join(data_dir, 'SimplifiedManifest_v1.0.tsv')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_write():
+    with tempfile.TemporaryDirectory() as tmpd:
+        manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv'))
+        manifest.validate()
+        (tsv_file, json_file) = manifest.write(tmpd) # output new manifest in tsv and json.
+
+def test_manifest_uuid():
+    with tempfile.TemporaryDirectory() as tmpd:
+        manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv'))
+        manifest.validate()
+        assert manifest.get_uuid()
+
+def test_manifest_existing_uuid():
+    with tempfile.TemporaryDirectory() as tmpd:
+        manifest = Manifest(os.path.join(test_data, 'with_uuid.tsv'))
+        manifest.validate()
+        assert manifest.get_uuid() == '05218fd0-79e5-4214-92d5-e133cd16a798'
+
+def test_manifest_existing_bad_uuid():
+    with pytest.raises(ValidationError) as e_info:
+        with tempfile.TemporaryDirectory() as tmpd:
+            manifest = Manifest(os.path.join(test_data, 'with_bad_uuid.tsv'))
+            manifest.validate()
+
+def test_manifest_uuid_novalidate():
+    with pytest.raises(ValidationError) as e_info:
+        with tempfile.TemporaryDirectory() as tmpd:
+            manifest = Manifest(os.path.join(test_data, 'file_set_good', 'files_good.tsv'))
+            assert manifest.get_uuid()
+
+### Config parsing tests
+
+def test_manifest_get_config_bad_type():
+    with pytest.raises(ParsingError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        header = Header(os.path.join(test_data, 'good_manifest.tsv'))
+        header.get_config(os.path.join(configs, 'bad_type', 'IMPORT-1.0.json'))
+
+def test_manifest_get_config_bad_version():
+    with pytest.raises(ParsingError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        header = Header(os.path.join(test_data, 'good_manifest.tsv'))
+        header.get_config(os.path.join(configs, 'bad_version', 'IMPORT-1.0.json'))
+
+def test_manifest_json_no_body():
+    with pytest.raises(ConfigError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        header = Header(os.path.join(test_data, 'good_manifest.tsv'))
+        header.get_config(os.path.join(configs, 'no_body', 'IMPORT-1.0.json'))
+
+def test_manifest_json_no_expected():
+    with pytest.raises(ConfigError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        header = Header(os.path.join(test_data, 'good_manifest.tsv'))
+        header.get_config(os.path.join(configs, 'no_expected', 'IMPORT-1.0.json'))
+
+def test_manifest_json_no_header():
+    with pytest.raises(ConfigError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        header = Header(os.path.join(test_data, 'good_manifest.tsv'))
+        header.get_config(os.path.join(configs, 'no_header', 'IMPORT-1.0.json'))
+
+def test_manifest_json_no_required():
+    with pytest.raises(ConfigError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        header = Header(os.path.join(test_data, 'good_manifest.tsv'))
+        header.get_config(os.path.join(configs, 'no_required', 'IMPORT-1.0.json'))
+
+def test_manifest_json_no_validate():
+    with pytest.raises(ConfigError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        header = Header(os.path.join(test_data, 'good_manifest.tsv'))
+        header.get_config(os.path.join(configs, 'no_validate', 'IMPORT-1.0.json'))
+
+def test_manifest_json_limit_no_limit_by():
+    with pytest.raises(ConfigError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        infile = os.path.join(test_data,
+                             'file_set_good',
+                             'files_good.tsv')
+        header = Header(infile)
+        cfg = header.get_config(os.path.join(configs, 'limit_no_limit_by', 'IMPORT-1.0.json'))
+        body = Body(infile, cfg['body'])
+        body.validate(cfg['body'])
+
+def test_manifest_json_limit_by_no_limit():
+    with pytest.raises(ConfigError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        infile = os.path.join(test_data,
+                             'file_set_good',
+                             'files_good.tsv')
+        header = Header(infile)
+        cfg = header.get_config(os.path.join(configs, 'limit_by_no_limit', 'IMPORT-1.0.json'))
+        body = Body(infile, cfg['body'])
+        body.validate(cfg['body'])
+
+### Header tests
+
+def test_manifest_extra_header():
+    with pytest.raises(ValidationError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        header = Header(os.path.join(test_data, 'extraHeader.tsv'))
+        config = header.get_config()
+        header.validate(config['header'])
+
+def test_manifest_missing_header():
+    with pytest.raises(ValidationError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        header = Header(os.path.join(test_data, 'missingHeader.tsv'))
+        config = header.get_config()
+        header.validate(config['header'])
+
+def test_manifest_invalid_header_val():
+    with pytest.raises(ValidationError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        header = Header(os.path.join(test_data, 'invalidHeaderVal.tsv'))
+        config = header.get_config()
+        header.validate(config['header'])
+
+### Body tests
+
+def test_manifest_invalid_body_val():
+    with pytest.raises(ValidationError) as e_info:
+        infile = os.path.join(test_data, 'invalidBodyVal.tsv')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_absent_body_val():
+    with pytest.raises(ValidationError) as e_info:
+        infile = os.path.join(test_data, 'absentBodyVal.tsv')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_period_body_val():
+    with pytest.raises(ValidationError) as e_info:
+        infile = os.path.join(test_data, 'periodBodyVal.tsv')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_dup_files_same_row():
+    with pytest.raises(ValidationError) as e_info:
+        infile = os.path.join(test_data, 'dupFilesSameRow.tsv')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_dup_files_diff_row():
+    with pytest.raises(ValidationError) as e_info:
+        infile = os.path.join(test_data, 'dupFilesDiffRow.tsv')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_body_head_order():
+    with pytest.raises(ValidationError) as e_info:
+        infile = os.path.join(test_data, 'bodyHeadOrder.tsv')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_extn_file1():
+    with pytest.raises(ValidationError) as e_info:
+        infile = os.path.join(test_data, 'invalidExtnFile1.tsv')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_extn_file2():
+    with pytest.raises(ValidationError) as e_info:
+        infile = os.path.join(test_data, 'invalidExtnFile2.tsv')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_paired_extn_mismatch():
+    with pytest.raises(ValidationError) as e_info:
+        infile = os.path.join(test_data, 'pairedExtnMismatch.tsv')
+        manifest = Manifest(infile)
+        manifest.validate()
+
+def test_manifest_limit_exceeded():
+    with pytest.raises(ValidationError) as e_info:
+        # need a good file to setup and then test get_config with a bad config file
+        infile = os.path.join(test_data,
+                             'file_set_good',
+                             'files_good.tsv')
+        header = Header(infile)
+        cfg = header.get_config(os.path.join(configs, 'limit_to_exceed', 'IMPORT-1.0.json'))
+        body = Body(infile, cfg['body'])
+        body.validate(cfg['body'])
+
+def test_manifest_file_set_good():
+    infile = os.path.join(test_data, 'file_set_good',
+                         'files_good.tsv')
+    manifest = Manifest(infile)
+    manifest.validate(True)
+    as_json = json.dumps(manifest.for_json())
diff --git a/tests/cgp_seq_input_val_tests_normalise.py b/tests/test_cgp_seq_input_val_normalise.py
similarity index 96%
rename from tests/cgp_seq_input_val_tests_normalise.py
rename to tests/test_cgp_seq_input_val_normalise.py
index b0e8ba0..33a49e4 100644
--- a/tests/cgp_seq_input_val_tests_normalise.py
+++ b/tests/test_cgp_seq_input_val_normalise.py
@@ -1,4 +1,3 @@
-from nose.tools import *
 import sys, os, tempfile, shutil
 from cgp_seq_input_val.manifest import normalise
 from argparse import Namespace
@@ -13,10 +12,10 @@ def setup_args(indir, intype, tmpd):
                      output=os.path.join(tmpd, '%s_to.tsv' % (intype)) )
 
 def setup():
-    print("SETUP!")
+    pass
 
 def teardown():
-    print("TEAR DOWN!")
+    pass
 
 def test_normalise_xls():
     with tempfile.TemporaryDirectory() as tmpd:
diff --git a/tests/test_cgp_seq_input_val_seq_validator.py b/tests/test_cgp_seq_input_val_seq_validator.py
new file mode 100644
index 0000000..9f18b56
--- /dev/null
+++ b/tests/test_cgp_seq_input_val_seq_validator.py
@@ -0,0 +1,83 @@
+import pytest
+import os, sys, tempfile
+
+from cgp_seq_input_val.seq_validator import SeqValidator
+from cgp_seq_input_val.error_classes import SeqValidationError
+
+test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'fastq_read')
+
+def setup():
+    pass
+
+def teardown():
+    pass
+
+def test_seq_val_i_read_good():
+    fqi = os.path.join(test_dir, 'good_read_i.fq')
+    sv = SeqValidator(fqi, None, progress_pairs=1)
+    sv.validate()
+
+def test_seq_val_p_read_good():
+    fq1 = os.path.join(test_dir, 'good_read_1.fq')
+    fq2 = os.path.join(test_dir, 'good_read_2.fq')
+    sv = SeqValidator(fq1, fq2, progress_pairs=1)
+    sv.validate()
+
+def test_seq_val_i_gz_read_good():
+    fqi = os.path.join(test_dir, 'good_read_i.fq.gz')
+    sv = SeqValidator(fqi, None, progress_pairs=0)
+    sv.validate()
+    t = str(sv)
+    sv.report(sys.stdout)
+
+def test_seq_val_p_gz_read_good():
+    fq1 = os.path.join(test_dir, 'good_read_1.fq.gz')
+    fq2 = os.path.join(test_dir, 'good_read_2.fq.gz')
+    sv = SeqValidator(fq1, fq2, progress_pairs=0)
+    sv.validate()
+
+def test_seq_val_bad_file():
+    with pytest.raises(SeqValidationError) as e_info:
+        fqi = os.path.join(test_dir, 'good_read_i.BAD')
+        sv = SeqValidator(fqi, None, progress_pairs=0)
+
+def test_seq_val_mismatch_ext():
+    with pytest.raises(SeqValidationError) as e_info:
+        fq1 = os.path.join(test_dir, 'good_read_1.fq')
+        fq2 = os.path.join(test_dir, 'good_read_2.fq.gz')
+        sv = SeqValidator(fq1, fq2, progress_pairs=0)
+
+def test_seq_val_more_read2():
+    with pytest.raises(SeqValidationError) as e_info:
+        fq1 = os.path.join(test_dir, 'good_read_1.fq')
+        fq2 = os.path.join(test_dir, '2_reads_2.fq')
+        sv = SeqValidator(fq1, fq2, progress_pairs=0)
+        sv.validate()
+
+def test_seq_val_more_read1():
+    with pytest.raises(SeqValidationError) as e_info:
+        fq1 = os.path.join(test_dir, '2_reads_1.fq')
+        fq2 = os.path.join(test_dir, 'good_read_2.fq')
+        sv = SeqValidator(fq1, fq2, progress_pairs=0)
+        sv.validate()
+
+def test_seq_val_r1_in_2():
+    with pytest.raises(SeqValidationError) as e_info:
+        fq1 = os.path.join(test_dir, 'good_read_1.fq')
+        fq2 = os.path.join(test_dir, 'r1_reads_in_2.fq')
+        sv = SeqValidator(fq1, fq2, progress_pairs=0)
+        sv.validate()
+
+def test_seq_val_r2_in_1():
+    with pytest.raises(SeqValidationError) as e_info:
+        fq1 = os.path.join(test_dir, 'good_read_2.fq')
+        fq2 = os.path.join(test_dir, 'r2_reads_in_1.fq')
+        sv = SeqValidator(fq1, fq2, progress_pairs=0)
+        sv.validate()
+
+def test_seq_val_fq_name():
+    with pytest.raises(SeqValidationError) as e_info:
+        fq1 = os.path.join(test_dir, 'good_read_1.fq')
+        fq2 = os.path.join(test_dir, 'diff_2.fq')
+        sv = SeqValidator(fq1, fq2, progress_pairs=0)
+        sv.validate()

From b39eb1106605221dc56ade888c4eb8a7d76bd0be Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 11 Oct 2017 11:24:29 +0100
Subject: [PATCH 24/37] remove use of exit

---
 cgp_seq_input_val/seq_validator.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py
index 59ec1b2..f7d8834 100644
--- a/cgp_seq_input_val/seq_validator.py
+++ b/cgp_seq_input_val/seq_validator.py
@@ -28,12 +28,10 @@ def validate_seq_files(args):
         validator.validate()
         validator.report(args.report)
     except SeqValidationError as ve:  # runtime so no functions for message and errno
-        print("ERROR: " + str(ve), file=sys.stderr)
-        exit(1)
+        sys.exit("ERROR: " + str(ve))
     # have to catch 2 classes works 3.0-3.3, above 3.3 all IO issues are captured under OSError
     except (OSError, IOError) as err:
-        print("ERROR: %s - %s" % (err.strerror, err.filename), file=sys.stderr)
-        exit(err.errno)
+        sys.exit("ERROR (%d): %s - %s" % (err.errno, err.strerror, err.filename))
 
 
 class SeqValidator(object):

From b6d07c4658605f952dc1c4fc315eede3ccd18a6b Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 11 Oct 2017 12:11:38 +0100
Subject: [PATCH 25/37] Switch from pylint to pep8 for better alignment with
 python style guide and code-climate

---
 README.md                          |   2 +-
 cgp_seq_input_val/command_line.py  |  85 ++++--
 cgp_seq_input_val/manifest.py      |  46 ++--
 cgp_seq_input_val/seq_validator.py |  10 +-
 pylintrc                           | 425 -----------------------------
 run_tests.sh                       |   6 +-
 tox.ini                            |   4 +-
 7 files changed, 104 insertions(+), 474 deletions(-)
 delete mode 100644 pylintrc

diff --git a/README.md b/README.md
index d3d1257..a305185 100644
--- a/README.md
+++ b/README.md
@@ -122,7 +122,7 @@ For testing/coverage (`./run_tests.sh`)
 source env/bin/activate # if not already in env
 pip install pytest
 pip install pytest-cov
-pip install pylint
+pip install pep8
 pip install radon
 ```
 
diff --git a/cgp_seq_input_val/command_line.py b/cgp_seq_input_val/command_line.py
index 7cd6d2d..38a3a07 100644
--- a/cgp_seq_input_val/command_line.py
+++ b/cgp_seq_input_val/command_line.py
@@ -12,6 +12,7 @@
 from cgp_seq_input_val.seq_validator import validate_seq_files
 version = pkg_resources.require("cgp_seq_input_val")[0].version
 
+
 def main():
     """
     Sets up the parser and handles triggereing of correct sub-command
@@ -20,37 +21,75 @@ def main():
     subparsers = parser.add_subparsers(help='sub-command help')
 
     # create the parser for the "man-norm" command
-    parser_a = subparsers.add_parser('man-norm', description='Convert manifest files to common denominator (tsv)',
-                                     epilog='Input can be [xls|xlsx|csv|tsv].  \
-                                     "tsv" is just copied to maintain tool-chain')
-    parser_a.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
-    parser_a.add_argument('-i', '--input', dest='input', metavar='FILE',
-                          help='Input manifest in friendly formats', required=True,
-                          type=lambda s: cliutil.extn_check(parser, constants.MANIFEST_EXTNS, s, readable=True))
-    parser_a.add_argument('-o', '--output', dest='output', metavar='FILE',
-                          help='Output file *.tsv [default: sub. extension]', required=False,
+    parser_a = subparsers.\
+        add_parser('man-norm',
+                   description='Convert manifest files to common denominator (tsv)',
+                   epilog='Input can be [xls|xlsx|csv|tsv].  \
+                   "tsv" is just copied to maintain tool-chain')
+    parser_a.add_argument('-v', '--version',
+                          action='version',
+                          version='%(prog)s ' + version)
+    parser_a.add_argument('-i', '--input',
+                          dest='input',
+                          metavar='FILE',
+                          help='Input manifest in friendly formats',
+                          required=True,
+                          type=lambda s: cliutil.extn_check(parser,
+                                                            constants.MANIFEST_EXTNS,
+                                                            s,
+                                                            readable=True))
+    parser_a.add_argument('-o', '--output',
+                          dest='output',
+                          metavar='FILE',
+                          help='Output file *.tsv [default: sub. extension]',
+                          required=False,
                           type=lambda s: cliutil.extn_check(parser, ('tsv'), s))
     parser_a.set_defaults(func=normalise)
 
     # create the parser for the "man-valid" command
-    parser_b = subparsers.add_parser('man-valid', description='Validate a tsv import manifest file')
-    parser_b.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
-    parser_b.add_argument('-i', '--input', dest='input', metavar='FILE',
-                          help='Input manifest in tsv formats', required=True,
-                          type=lambda s: cliutil.extn_check(parser, ('tsv'), s, readable=True))
-    parser_b.add_argument('-o', '--output', dest='output', metavar='DIR',
-                          help='Output manifest to this area, two files (tsv, json)', required=True)
-    parser_b.add_argument('-c', '--checkfiles', dest='checkfiles', action='store_true',
+    parser_b = subparsers.add_parser('man-valid',
+                                     description='Validate a tsv import manifest file')
+    parser_b.add_argument('-v', '--version',
+                          action='version',
+                          version='%(prog)s ' + version)
+    parser_b.add_argument('-i', '--input',
+                          dest='input',
+                          metavar='FILE',
+                          help='Input manifest in tsv formats',
+                          required=True,
+                          type=lambda s: cliutil.extn_check(parser,
+                                                            ('tsv'),
+                                                            s,
+                                                            readable=True))
+    parser_b.add_argument('-o', '--output',
+                          dest='output',
+                          metavar='DIR',
+                          help='Output manifest to this area, two files (tsv, json)',
+                          required=True)
+    parser_b.add_argument('-c', '--checkfiles',
+                          dest='checkfiles',
+                          action='store_true',
                           help='When present check file exist and are non-zero size')
     parser_b.set_defaults(func=wrapped_validate)
 
     # create the parser for the "seq-valid" command
-    parser_c = subparsers.add_parser('seq-valid', description='Validates up to 2 sequencing data files.')
-    parser_c.add_argument('-v', '--version', action='version', version='%(prog)s ' + version)
-    parser_c.add_argument('-r', '--report', dest='report', type=argparse.FileType('w'), default='-',
-                          help='Output json report', required=False)
-    parser_c.add_argument('-i', '--input', dest='input', metavar='FILE', nargs='+',
-                          help='Input manifest in tsv formats', required=True)
+    parser_c = subparsers.add_parser('seq-valid',
+                                     description='Validates up to 2 sequencing data files.')
+    parser_c.add_argument('-v', '--version',
+                          action='version',
+                          version='%(prog)s ' + version)
+    parser_c.add_argument('-r', '--report',
+                          dest='report',
+                          type=argparse.FileType('w'),
+                          default='-',
+                          help='Output json report',
+                          required=False)
+    parser_c.add_argument('-i', '--input',
+                          dest='input',
+                          metavar='FILE',
+                          nargs='+',
+                          help='Input manifest in tsv formats',
+                          required=True)
     parser_c.set_defaults(func=validate_seq_files)
 
     args = parser.parse_args()
diff --git a/cgp_seq_input_val/manifest.py b/cgp_seq_input_val/manifest.py
index 809e0a1..0b17e51 100644
--- a/cgp_seq_input_val/manifest.py
+++ b/cgp_seq_input_val/manifest.py
@@ -12,7 +12,9 @@
 from pkg_resources import resource_string, resource_filename
 
 from cgp_seq_input_val import constants
-from cgp_seq_input_val.error_classes import ConfigError, ParsingError, ValidationError
+from cgp_seq_input_val.error_classes import (ConfigError,
+                                             ParsingError,
+                                             ValidationError)
 from cgp_seq_input_val.file_meta import FileMeta
 
 VAL_LIM_ERROR = "Only %d sample(s) with a value of '%s' is allowed in column \
@@ -20,6 +22,7 @@
 VAL_LIM_CONFIG_ERROR = "'limit' and 'limit_by' must both be defined when either \
                        is present, check body.validate."
 
+
 def wrapped_validate(args):
     """
     Top level entry point for validating a manifest
@@ -33,6 +36,7 @@ def wrapped_validate(args):
     except ValidationError as ve:
         sys.exit("ERROR: " + str(ve))
 
+
 def uuid4_chk(uuid_str):
     """Tests validity of uuid"""
     try:
@@ -440,22 +444,30 @@ def field_values_valid(self, validate):
                                           % (field, fd.attributes[field], cnt))
                 # Construct value occurence limiting counts
                 for val_limit in chk:
-                    if 'limit' in val_limit or 'limit_by' in val_limit:
-                        if 'limit' not in val_limit or 'limit_by' not in val_limit:
-                            raise ConfigError(VAL_LIM_CONFIG_ERROR+field)
-
-                        if fd.attributes[field] != val_limit['value']:
-                            continue
-
-                        lim_chk_lookup = field + '_' + val_limit['value']
-                        limit_by_value = fd.attributes[val_limit['limit_by']]
-                        if lim_chk_lookup not in limit_chks:
-                            limit_chks[lim_chk_lookup] = {}
-                        if limit_by_value not in limit_chks[lim_chk_lookup]:
-                            limit_chks[lim_chk_lookup][limit_by_value] = {}
-                        if fd.attributes['Sample'] not in limit_chks[lim_chk_lookup][limit_by_value]:
-                            limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] = 0
-                        limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] += 1
+                    if 'limit' not in val_limit and 'limit_by' not in val_limit:
+                        # if in neither skip
+                        continue
+
+                    if 'limit' not in val_limit or 'limit_by' not in val_limit:
+                        # must be found in both
+                        raise ConfigError(VAL_LIM_CONFIG_ERROR+field)
+
+                    if fd.attributes[field] != val_limit['value']:
+                        continue
+
+                    lim_chk_lookup = field + '_' + val_limit['value']
+                    limit_by_value = fd.attributes[val_limit['limit_by']]
+
+                    # handled things we've not seen yet
+                    if lim_chk_lookup not in limit_chks:
+                        limit_chks[lim_chk_lookup] = {}
+                    if limit_by_value not in limit_chks[lim_chk_lookup]:
+                        limit_chks[lim_chk_lookup][limit_by_value] = {}
+
+                    if fd.attributes['Sample'] not in limit_chks[lim_chk_lookup][limit_by_value]:
+                        limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] = 0
+                    limit_chks[lim_chk_lookup][limit_by_value][fd.attributes['Sample']] += 1
+
             evaulate_value_limits(field, chk, limit_chks)
 
     def fields_have_values(self, rules):
diff --git a/cgp_seq_input_val/seq_validator.py b/cgp_seq_input_val/seq_validator.py
index f7d8834..04156f1 100644
--- a/cgp_seq_input_val/seq_validator.py
+++ b/cgp_seq_input_val/seq_validator.py
@@ -16,6 +16,7 @@
 
 prog_records = 100000
 
+
 def validate_seq_files(args):
     """
     Top level entry point for validating sequence files.
@@ -225,17 +226,20 @@ def check_pair(self, read_1, read_2):
                 self.q_min = q_min
 
         if read_1.name != read_2.name:
-            raise SeqValidationError("Fastq record name at line %d should be a match to paired file line %s:\
+            raise SeqValidationError("Fastq record name at line %d should be a \
+                                     match to paired file line %s:\
                                      \n\t%s (%s)\n\t%s (%s)"
                                      % (read_1.file_pos[0], read_2.file_pos[0],
                                         read_1.name, self.file_a,
                                         read_2.name, self.file_b))
         if read_1.end != '1':
-            raise SeqValidationError("Fastq record at line %d of %s should be for first in pair, got '%s'"
+            raise SeqValidationError("Fastq record at line %d of %s should be \
+                                     for first in pair, got '%s'"
                                      % (read_1.file_pos[0], self.file_a, read_1.end))
 
         if read_2.end != '2':
-            raise SeqValidationError("Fastq record at line %d of %s should be for second in pair, got '%s'"
+            raise SeqValidationError("Fastq record at line %d of %s should be \
+                                     for second in pair, got '%s'"
                                      % (read_2.file_pos[0], self.file_b, read_2.end))
 
     def setup_progress(self):
diff --git a/pylintrc b/pylintrc
deleted file mode 100644
index 23d1be8..0000000
--- a/pylintrc
+++ /dev/null
@@ -1,425 +0,0 @@
-[MASTER]
-
-# A comma-separated list of package or module names from where C extensions may
-# be loaded. Extensions are loading into the active Python interpreter and may
-# run arbitrary code
-extension-pkg-whitelist=
-
-# Add files or directories to the blacklist. They should be base names, not
-# paths.
-ignore=CVS
-
-# Add files or directories matching the regex patterns to the blacklist. The
-# regex matches against base names, not paths.
-ignore-patterns=
-
-# Python code to execute, usually for sys.path manipulation such as
-# pygtk.require().
-#init-hook=
-
-# Use multiple processes to speed up Pylint.
-jobs=1
-
-# List of plugins (as comma separated values of python modules names) to load,
-# usually to register additional checkers.
-load-plugins=
-
-# Pickle collected data for later comparisons.
-persistent=yes
-
-# Specify a configuration file.
-#rcfile=
-
-# Allow loading of arbitrary C extensions. Extensions are imported into the
-# active Python interpreter and may run arbitrary code.
-unsafe-load-any-extension=no
-
-
-[MESSAGES CONTROL]
-
-# Only show warnings with the listed confidence levels. Leave empty to show
-# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
-confidence=
-
-# Disable the message, report, category or checker with the given id(s). You
-# can either give multiple identifiers separated by comma (,) or put this
-# option multiple times (only on the command line, not in the configuration
-# file where it should appear only once).You can also use "--disable=all" to
-# disable everything first and then reenable specific checks. For example, if
-# you want to run only the similarities checker, you can use "--disable=all
-# --enable=similarities". If you want to run only the classes checker, but have
-# no Warning level messages displayed, use"--disable=all --enable=classes
-# --disable=W"
-disable=print-statement,parameter-unpacking,unpacking-in-except,old-raise-syntax,backtick,long-suffix,old-ne-operator,old-octal-literal,import-star-module-level,raw-checker-failed,bad-inline-option,locally-disabled,locally-enabled,file-ignored,suppressed-message,useless-suppression,deprecated-pragma,apply-builtin,basestring-builtin,buffer-builtin,cmp-builtin,coerce-builtin,execfile-builtin,file-builtin,long-builtin,raw_input-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,no-absolute-import,old-division,dict-iter-method,dict-view-method,next-method-called,metaclass-assignment,indexing-exception,raising-string,reload-builtin,oct-method,hex-method,nonzero-method,cmp-method,input-builtin,round-builtin,intern-builtin,unichr-builtin,map-builtin-not-iterating,zip-builtin-not-iterating,range-builtin-not-iterating,filter-builtin-not-iterating,using-cmp-argument,eq-without-hash,div-method,idiv-method,rdiv-method,exception-message-attribute,invalid-str-codec,sys-max-int,bad-python3-import,deprecated-string-function,deprecated-str-translate-call,invalid-name,too-few-public-methods,blacklisted-name
-
-# Enable the message, report, category or checker with the given id(s). You can
-# either give multiple identifier separated by comma (,) or put this option
-# multiple time (only on the command line, not in the configuration file where
-# it should appear only once). See also the "--disable" option for examples.
-enable=
-
-
-[REPORTS]
-
-# Python expression which should return a note less than 10 (10 is the highest
-# note). You have access to the variables errors warning, statement which
-# respectively contain the number of errors / warnings messages and the total
-# number of statements analyzed. This is used by the global evaluation report
-# (RP0004).
-evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
-
-# Template used to display messages. This is a python new-style format string
-# used to format the message information. See doc for all details
-#msg-template=
-
-# Set the output format. Available formats are text, parseable, colorized, json
-# and msvs (visual studio).You can also give a reporter class, eg
-# mypackage.mymodule.MyReporterClass.
-output-format=text
-
-# Tells whether to display a full report or only the messages
-reports=no
-
-# Activate the evaluation score.
-score=yes
-
-
-[REFACTORING]
-
-# Maximum number of nested blocks for function / method body
-max-nested-blocks=5
-
-
-[BASIC]
-
-# Naming hint for argument names
-argument-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
-# Regular expression matching correct argument names
-argument-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
-# Naming hint for attribute names
-attr-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
-# Regular expression matching correct attribute names
-attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
-# Bad variable names which should always be refused, separated by a comma
-bad-names=foo,bar,baz,toto,tutu,tata
-
-# Naming hint for class attribute names
-class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
-
-# Regular expression matching correct class attribute names
-class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
-
-# Naming hint for class names
-class-name-hint=[A-Z_][a-zA-Z0-9]+$
-
-# Regular expression matching correct class names
-class-rgx=[A-Z_][a-zA-Z0-9]+$
-
-# Naming hint for constant names
-const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
-
-# Regular expression matching correct constant names
-const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
-
-# Minimum line length for functions/classes that require docstrings, shorter
-# ones are exempt.
-docstring-min-length=-1
-
-# Naming hint for function names
-function-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
-# Regular expression matching correct function names
-function-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
-# Good variable names which should always be accepted, separated by a comma
-good-names=i,j,k,ex,Run,_
-
-# Include a hint for the correct naming format with invalid-name
-include-naming-hint=no
-
-# Naming hint for inline iteration names
-inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
-
-# Regular expression matching correct inline iteration names
-inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
-
-# Naming hint for method names
-method-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
-# Regular expression matching correct method names
-method-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
-# Naming hint for module names
-module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
-# Regular expression matching correct module names
-module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
-# Colon-delimited sets of names that determine each other's naming style when
-# the name regexes allow several styles.
-name-group=
-
-# Regular expression which should only match function or class names that do
-# not require a docstring.
-no-docstring-rgx=^_
-
-# List of decorators that produce properties, such as abc.abstractproperty. Add
-# to this list to register other decorators that produce valid properties.
-property-classes=abc.abstractproperty
-
-# Naming hint for variable names
-variable-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
-# Regular expression matching correct variable names
-variable-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
-
-[FORMAT]
-
-# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
-expected-line-ending-format=
-
-# Regexp for a line that is allowed to be longer than the limit.
-ignore-long-lines=^\s*(# )?<?https?://\S+>?$
-
-# Number of spaces of indent required inside a hanging  or continued line.
-indent-after-paren=4
-
-# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
-# tab).
-indent-string='    '
-
-# Maximum number of characters on a single line.
-max-line-length=120
-
-# Maximum number of lines in a module
-max-module-lines=1000
-
-# List of optional constructs for which whitespace checking is disabled. `dict-
-# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
-# `trailing-comma` allows a space between comma and closing bracket: (a, ).
-# `empty-line` allows space-only lines.
-no-space-check=trailing-comma,dict-separator
-
-# Allow the body of a class to be on the same line as the declaration if body
-# contains single statement.
-single-line-class-stmt=no
-
-# Allow the body of an if to be on the same line as the test if there is no
-# else.
-single-line-if-stmt=no
-
-
-[LOGGING]
-
-# Logging modules to check that the string format arguments are in logging
-# function parameter format
-logging-modules=logging
-
-
-[MISCELLANEOUS]
-
-# List of note tags to take in consideration, separated by a comma.
-notes=FIXME,XXX,TODO
-
-
-[SIMILARITIES]
-
-# Ignore comments when computing similarities.
-ignore-comments=yes
-
-# Ignore docstrings when computing similarities.
-ignore-docstrings=yes
-
-# Ignore imports when computing similarities.
-ignore-imports=no
-
-# Minimum lines number of a similarity.
-min-similarity-lines=4
-
-
-[SPELLING]
-
-# Spelling dictionary name. Available dictionaries: none. To make it working
-# install python-enchant package.
-spelling-dict=
-
-# List of comma separated words that should not be checked.
-spelling-ignore-words=
-
-# A path to a file that contains private dictionary; one word per line.
-spelling-private-dict-file=
-
-# Tells whether to store unknown words to indicated private dictionary in
-# --spelling-private-dict-file option instead of raising a message.
-spelling-store-unknown-words=no
-
-
-[TYPECHECK]
-
-# List of decorators that produce context managers, such as
-# contextlib.contextmanager. Add to this list to register other decorators that
-# produce valid context managers.
-contextmanager-decorators=contextlib.contextmanager
-
-# List of members which are set dynamically and missed by pylint inference
-# system, and so shouldn't trigger E1101 when accessed. Python regular
-# expressions are accepted.
-generated-members=
-
-# Tells whether missing members accessed in mixin class should be ignored. A
-# mixin class is detected if its name ends with "mixin" (case insensitive).
-ignore-mixin-members=yes
-
-# This flag controls whether pylint should warn about no-member and similar
-# checks whenever an opaque object is returned when inferring. The inference
-# can return multiple potential results while evaluating a Python object, but
-# some branches might not be evaluated, which results in partial inference. In
-# that case, it might be useful to still emit no-member and other checks for
-# the rest of the inferred objects.
-ignore-on-opaque-inference=yes
-
-# List of class names for which member attributes should not be checked (useful
-# for classes with dynamically set attributes). This supports the use of
-# qualified names.
-ignored-classes=optparse.Values,thread._local,_thread._local
-
-# List of module names for which member attributes should not be checked
-# (useful for modules/projects where namespaces are manipulated during runtime
-# and thus existing member attributes cannot be deduced by static analysis. It
-# supports qualified module names, as well as Unix pattern matching.
-ignored-modules=
-
-# Show a hint with possible names when a member name was not found. The aspect
-# of finding the hint is based on edit distance.
-missing-member-hint=yes
-
-# The minimum edit distance a name should have in order to be considered a
-# similar match for a missing member name.
-missing-member-hint-distance=1
-
-# The total number of similar names that should be taken in consideration when
-# showing a hint for a missing member.
-missing-member-max-choices=1
-
-
-[VARIABLES]
-
-# List of additional names supposed to be defined in builtins. Remember that
-# you should avoid to define new builtins when possible.
-additional-builtins=
-
-# Tells whether unused global variables should be treated as a violation.
-allow-global-unused-variables=yes
-
-# List of strings which can identify a callback function by name. A callback
-# name must start or end with one of those strings.
-callbacks=cb_,_cb
-
-# A regular expression matching the name of dummy variables (i.e. expectedly
-# not used).
-dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
-
-# Argument names that match this expression will be ignored. Default to name
-# with leading underscore
-ignored-argument-names=_.*|^ignored_|^unused_
-
-# Tells whether we should check for unused import in __init__ files.
-init-import=no
-
-# List of qualified module names which can have objects that can redefine
-# builtins.
-redefining-builtins-modules=six.moves,future.builtins
-
-
-[CLASSES]
-
-# List of method names used to declare (i.e. assign) instance attributes.
-defining-attr-methods=__init__,__new__,setUp
-
-# List of member names, which should be excluded from the protected access
-# warning.
-exclude-protected=_asdict,_fields,_replace,_source,_make
-
-# List of valid names for the first argument in a class method.
-valid-classmethod-first-arg=cls
-
-# List of valid names for the first argument in a metaclass class method.
-valid-metaclass-classmethod-first-arg=mcs
-
-
-[DESIGN]
-
-# Maximum number of arguments for function / method
-max-args=5
-
-# Maximum number of attributes for a class (see R0902).
-max-attributes=7
-
-# Maximum number of boolean expressions in a if statement
-max-bool-expr=5
-
-# Maximum number of branch for function / method body
-max-branches=12
-
-# Maximum number of locals for function / method body
-max-locals=15
-
-# Maximum number of parents for a class (see R0901).
-max-parents=7
-
-# Maximum number of public methods for a class (see R0904).
-max-public-methods=20
-
-# Maximum number of return / yield for function / method body
-max-returns=6
-
-# Maximum number of statements in function / method body
-max-statements=50
-
-# Minimum number of public methods for a class (see R0903).
-min-public-methods=2
-
-
-[IMPORTS]
-
-# Allow wildcard imports from modules that define __all__.
-allow-wildcard-with-all=no
-
-# Analyse import fallback blocks. This can be used to support both Python 2 and
-# 3 compatible code, which means that the block might have code that exists
-# only in one or another interpreter, leading to false positives when analysed.
-analyse-fallback-blocks=no
-
-# Deprecated modules which should not be used, separated by a comma
-deprecated-modules=optparse,tkinter.tix
-
-# Create a graph of external dependencies in the given file (report RP0402 must
-# not be disabled)
-ext-import-graph=
-
-# Create a graph of every (i.e. internal and external) dependencies in the
-# given file (report RP0402 must not be disabled)
-import-graph=
-
-# Create a graph of internal dependencies in the given file (report RP0402 must
-# not be disabled)
-int-import-graph=
-
-# Force import order to recognize a module as part of the standard
-# compatibility libraries.
-known-standard-library=
-
-# Force import order to recognize a module as part of a third party library.
-known-third-party=enchant
-
-
-[EXCEPTIONS]
-
-# Exceptions that will emit a warning when being caught. Defaults to
-# "Exception"
-overgeneral-exceptions=Exception
diff --git a/run_tests.sh b/run_tests.sh
index e9a90b5..1b8caca 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -6,9 +6,9 @@ set +e
 # these should not die:
 
 echo -e "\n##########################"
-echo      "# Running pylint (style) #"
+echo      "# Running pep8 (style)   #"
 echo      "##########################"
-pylint --output-format=colorized cgp_seq_input_val
+pep8 --format=pylint cgp_seq_input_val
 
 echo -e "\n#########################################"
 echo      "# Running radon (cyclomatic complexity) #"
@@ -18,6 +18,6 @@ radon cc -nc cgp_seq_input_val
 echo -e "\n#########################################"
 echo      "# Running radon (maintainability index) #"
 echo      "#########################################"
-radon mi -s -n B cgp_seq_input_val
+radon mi -s cgp_seq_input_val
 
 exit 0 # don't die based on assements of code quality
diff --git a/tox.ini b/tox.ini
index d8b4df9..48185ec 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [pep8]
-max-line-length = 160
+max-line-length = 99
 
 [pycodestyle]
-max-line-length = 160
+max-line-length = 99

From a210c93b767d04a2c317a09624f1b7e5aa2ba76e Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 11 Oct 2017 13:59:06 +0100
Subject: [PATCH 26/37] Fix install docs

---
 README.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index a305185..f31f3ca 100644
--- a/README.md
+++ b/README.md
@@ -66,16 +66,15 @@ though.
 
 ## INSTALL
 
-Installation is via `easy_install`.  Simply execute with the path to the compiled
-'egg':
+Installation is via `pip`.  Simply execute with the path to the packaged distribution:
 
 ```bash
-easy_install bundles/cgp_seq_input_val-0.1.0-py3.6.egg
+pip install --find-links=~/wheels cgp_seq_input_val
 ```
 
 ### Package Dependancies
 
-`easy_install` will install the relevant dependancies, listed here for convenience:
+`pip` will install the relevant dependancies, listed here for convenience:
 
 * [progressbar2](http://progressbar-2.readthedocs.io/en/latest/)
 * [xlrd](https://github.com/python-excel/xlrd)
@@ -99,7 +98,7 @@ You can run the same checks manually without a commit by executing the following
 in the base of the clone:
 
 ```bash
-./run_tests.py
+./run_tests.psh
 ```
 
 ### Development Dependencies

From 9b85d81cbd4b93e5de21ec2d136b2e2edaf3b70e Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 11 Oct 2017 14:08:04 +0100
Subject: [PATCH 27/37] Use a common parser for version

---
 cgp_seq_input_val/command_line.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/cgp_seq_input_val/command_line.py b/cgp_seq_input_val/command_line.py
index 38a3a07..a6dcbbb 100644
--- a/cgp_seq_input_val/command_line.py
+++ b/cgp_seq_input_val/command_line.py
@@ -17,18 +17,22 @@ def main():
     """
     Sets up the parser and handles triggereing of correct sub-command
     """
-    parser = argparse.ArgumentParser(prog='cgpSeqInputVal')
+    common_parser = argparse.ArgumentParser('parent', add_help=False)
+    common_parser.add_argument('-v', '--version',
+                               action='version',
+                               version='%(prog)s ' + version)
+
+    parser = argparse.ArgumentParser(prog='cgpSeqInputVal', parents=[common_parser])
+
     subparsers = parser.add_subparsers(help='sub-command help')
 
     # create the parser for the "man-norm" command
     parser_a = subparsers.\
         add_parser('man-norm',
+                   parents=[common_parser],
                    description='Convert manifest files to common denominator (tsv)',
                    epilog='Input can be [xls|xlsx|csv|tsv].  \
                    "tsv" is just copied to maintain tool-chain')
-    parser_a.add_argument('-v', '--version',
-                          action='version',
-                          version='%(prog)s ' + version)
     parser_a.add_argument('-i', '--input',
                           dest='input',
                           metavar='FILE',
@@ -48,10 +52,8 @@ def main():
 
     # create the parser for the "man-valid" command
     parser_b = subparsers.add_parser('man-valid',
+                                     parents=[common_parser],
                                      description='Validate a tsv import manifest file')
-    parser_b.add_argument('-v', '--version',
-                          action='version',
-                          version='%(prog)s ' + version)
     parser_b.add_argument('-i', '--input',
                           dest='input',
                           metavar='FILE',
@@ -74,10 +76,8 @@ def main():
 
     # create the parser for the "seq-valid" command
     parser_c = subparsers.add_parser('seq-valid',
+                                     parents=[common_parser],
                                      description='Validates up to 2 sequencing data files.')
-    parser_c.add_argument('-v', '--version',
-                          action='version',
-                          version='%(prog)s ' + version)
     parser_c.add_argument('-r', '--report',
                           dest='report',
                           type=argparse.FileType('w'),

From 53fabb416dba10d8c8de8ef713eaf3ce76073cee Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Tue, 17 Oct 2017 20:37:38 +0100
Subject: [PATCH 28/37] Add licence

---
 LICENSE | 661 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 661 insertions(+)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..dbbe355
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published
+    by the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<http://www.gnu.org/licenses/>.

From ccd881f1abd4fd75c1f6a4aae6ebe51cb9232d7e Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Tue, 17 Oct 2017 20:50:00 +0100
Subject: [PATCH 29/37] Change test suite used in travis

---
 .travis.yml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 1989da4..7b1e4c9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,22 +9,21 @@ env:
 language: python
 
 python:
+  - "3.3"
   - "3.6"
 
 install:
-  - pip install nose coverage
+  - pip install pytest pytest-cov
   - pip install progressbar2
   - pip install xlrd
 
-
 before_script:
   - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
   - chmod +x ./cc-test-reporter
   - ./cc-test-reporter before-build
 
 script:
-  - nosetests --with-coverage --cover-package=cgp_seq_input_val
-  - coverage xml
+  - pytest --cov-branch --cov-report term --cov=cgp_seq_input_val
 
 after_script:
   - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT

From e8bc0b4f2b8b618f6ee5611a167efc8b560f6099 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Tue, 17 Oct 2017 20:50:39 +0100
Subject: [PATCH 30/37] Add branch coverage and change linter

---
 run_tests.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/run_tests.sh b/run_tests.sh
index 1b8caca..5a829c3 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -1,14 +1,14 @@
 #!/usr/bin/env bash
 set -e
-pytest --cov-report term --cov-report html --cov=cgp_seq_input_val --cov-fail-under=50
+pytest --cov-branch --cov-report term --cov-report html --cov=cgp_seq_input_val --cov-fail-under=50
 set +e
 
 # these should not die:
 
-echo -e "\n##########################"
-echo      "# Running pep8 (style)   #"
-echo      "##########################"
-pep8 --format=pylint cgp_seq_input_val
+echo -e "\n#################################"
+echo      "# Running pycodestyle (style)   #"
+echo      "#################################"
+pycodestyle cgp_seq_input_val
 
 echo -e "\n#########################################"
 echo      "# Running radon (cyclomatic complexity) #"

From 8247a73c137feb2bd814a8d8069ec6f80001832f Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Tue, 17 Oct 2017 20:50:56 +0100
Subject: [PATCH 31/37] Docs and deps

---
 README.md | 22 ++++++++++++++++++++++
 setup.py  |  7 ++++---
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f31f3ca..78f551f 100644
--- a/README.md
+++ b/README.md
@@ -141,3 +141,25 @@ $ scp cgp_seq_input_val-1.1.0-py3-none-any.whl user@host:~/wheels
 # on host
 $ pip install --find-links=~/wheels cgp_seq_input_val
 ```
+
+
+LICENCE
+========
+Copyright (c) 2017 Genome Research Ltd.
+
+Author: CancerIT <cgpit@sanger.ac.uk>
+
+This file is part of cgp_seq_input_val.
+
+cgp_seq_input_val is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option) any
+later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
diff --git a/setup.py b/setup.py
index 9408ecc..d6fd874 100755
--- a/setup.py
+++ b/setup.py
@@ -6,8 +6,8 @@
     'name': 'cgp_seq_input_val',
     'description': 'Code to validate manifests and raw seq data',
     'author': 'Keiran M Raine',
-    'url': 'https://gitlab.internal.sanger.ac.uk/CancerIT/cgp_seq_input_val',
-    'download_url': 'Where to download it.',
+    'url': 'https://github.com/cancerit/cgp_seq_input_val',
+    'download_url': '',
     'author_email': 'cgphelp@sanger.ac.uk',
     'version': '1.1.0',
     'python_requires': '>= 3.3',
@@ -17,7 +17,8 @@
     'package_data': {'cgp_seq_input_val': ['config/*.json']},
     'entry_points': {
         'console_scripts': ['cgpSeqInputVal=cgp_seq_input_val.command_line:main'],
-    }
+    },
+    'metadata': {'license_file': 'LICENSE'},
 }
 
 setup(**config)

From 1e13441fe4f046e730c5fe7a8b17e82bb9bf0950 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Tue, 17 Oct 2017 20:54:20 +0100
Subject: [PATCH 32/37] py3.3 needs additional package to use pytest

---
 .travis.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 7b1e4c9..6f98e74 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,6 +6,11 @@ env:
   global:
     - CC_TEST_REPORTER_ID=24af7ff1e88f3b70c8b9a5280ce9604d561dacd8eaa7b1d895128ca2bd724beb
 
+addons:
+  apt:
+    packages:
+      - python-logilab-common
+
 language: python
 
 python:

From 54b0413ab2b7f2b609d459db15085e55daf8c5dd Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Tue, 17 Oct 2017 20:54:46 +0100
Subject: [PATCH 33/37] add comment to package

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 6f98e74..9fca16d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ env:
 addons:
   apt:
     packages:
-      - python-logilab-common
+      - python-logilab-common # only for py3.3
 
 language: python
 

From 75d54864534662339886e16bdc6e775580dd5a52 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Tue, 17 Oct 2017 21:28:46 +0100
Subject: [PATCH 34/37] use 3.4 as minimum

---
 .travis.yml | 7 +------
 setup.py    | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 9fca16d..3734842 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,15 +6,10 @@ env:
   global:
     - CC_TEST_REPORTER_ID=24af7ff1e88f3b70c8b9a5280ce9604d561dacd8eaa7b1d895128ca2bd724beb
 
-addons:
-  apt:
-    packages:
-      - python-logilab-common # only for py3.3
-
 language: python
 
 python:
-  - "3.3"
+  - "3.4"
   - "3.6"
 
 install:
diff --git a/setup.py b/setup.py
index d6fd874..3f8cf95 100755
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
     'download_url': '',
     'author_email': 'cgphelp@sanger.ac.uk',
     'version': '1.1.0',
-    'python_requires': '>= 3.3',
+    'python_requires': '>= 3.4',
     'setup_requires': ['pytest'],
     'install_requires': ['progressbar2', 'xlrd'],
     'packages': ['cgp_seq_input_val'],

From 109ef3a00b4a1ec16f8c928f4ebb2e70d2476090 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Thu, 19 Oct 2017 14:35:58 +0100
Subject: [PATCH 35/37] Add markdown linter and cleanup

---
 README.md    | 10 +++++++---
 run_tests.sh |  5 +++++
 setup.py     |  1 -
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 78f551f..fabac3a 100644
--- a/README.md
+++ b/README.md
@@ -123,9 +123,14 @@ pip install pytest
 pip install pytest-cov
 pip install pep8
 pip install radon
+gem install --user-install mdl
 ```
 
-__Also see__ [Package Dependancies](#package-dependancies)
+Test that `mdl` is available, if not add the following to your path variable:
+
+```
+export PATH=$HOME/.gem/ruby/X.X.X/bin:$PATH
+```
 
 ### Cutting a release
 
@@ -142,9 +147,8 @@ $ scp cgp_seq_input_val-1.1.0-py3-none-any.whl user@host:~/wheels
 $ pip install --find-links=~/wheels cgp_seq_input_val
 ```
 
+## LICENCE
 
-LICENCE
-========
 Copyright (c) 2017 Genome Research Ltd.
 
 Author: CancerIT <cgpit@sanger.ac.uk>
diff --git a/run_tests.sh b/run_tests.sh
index 5a829c3..1f4c918 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -20,4 +20,9 @@ echo      "# Running radon (maintainability index) #"
 echo      "#########################################"
 radon mi -s cgp_seq_input_val
 
+echo -e "\n##############################"
+echo      "# Running mdl (markdownlint) #"
+echo      "##############################"
+mdl .
+
 exit 0 # don't die based on assements of code quality
diff --git a/setup.py b/setup.py
index 3f8cf95..b58d10c 100755
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,6 @@
     'entry_points': {
         'console_scripts': ['cgpSeqInputVal=cgp_seq_input_val.command_line:main'],
     },
-    'metadata': {'license_file': 'LICENSE'},
 }
 
 setup(**config)

From 0ec9b67f68614be98a0021730f60967d78ceb561 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Thu, 19 Oct 2017 14:37:27 +0100
Subject: [PATCH 36/37] Fixes typo spotted during pre merge checking

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fabac3a..0781734 100644
--- a/README.md
+++ b/README.md
@@ -98,7 +98,7 @@ You can run the same checks manually without a commit by executing the following
 in the base of the clone:
 
 ```bash
-./run_tests.psh
+./run_tests.sh
 ```
 
 ### Development Dependencies

From 281a1678a6183cf53a93c65ed68b58349037f249 Mon Sep 17 00:00:00 2001
From: Keiran Raine <kr2@sanger.ac.uk>
Date: Wed, 22 Nov 2017 14:34:16 +0000
Subject: [PATCH 37/37] Version bump

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b58d10c..7746631 100755
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@
     'url': 'https://github.com/cancerit/cgp_seq_input_val',
     'download_url': '',
     'author_email': 'cgphelp@sanger.ac.uk',
-    'version': '1.1.0',
+    'version': '1.2.0',
     'python_requires': '>= 3.4',
     'setup_requires': ['pytest'],
     'install_requires': ['progressbar2', 'xlrd'],