Skip to content

Commit

Permalink
update pipeline outputs test
Browse files Browse the repository at this point in the history
  • Loading branch information
ionox0 committed Aug 3, 2018
1 parent 2039d10 commit 3e79b25
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 46 deletions.
Binary file modified innovation_pipeline.pdf
Binary file not shown.
4 changes: 4 additions & 0 deletions python_tools/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,10 @@
UNFILTERED_BAM_SEARCH = re.compile(r'^.*_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX.bam$')
SIMPLEX_BAM_SEARCH = re.compile(r'^.*_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-simplex.bam$')
DUPLEX_BAM_SEARCH = re.compile(r'^.*_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-duplex.bam$')
STANDARD_BAI_SEARCH = re.compile(r'^.*_cl_aln_srt_MD_IR_FX_BR.bai$')
UNFILTERED_BAI_SEARCH = re.compile(r'^.*_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX.bai$')
SIMPLEX_BAI_SEARCH = re.compile(r'^.*_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-simplex.bai$')
DUPLEX_BAI_SEARCH = re.compile(r'^.*_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-duplex.bai$')

TRIM_FILE_SEARCH = re.compile(r'^.*_cl\.stats$')
MARK_DUPLICATES_FILE_SEARCH = re.compile(r'^.*\.md_metrics$')
Expand Down
51 changes: 11 additions & 40 deletions python_tools/test/test_pipeline_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,6 @@
logger = logging.getLogger('outputs_test')



def substrings_in_list(substrings, list):
"""
Check to see that all elements from `substrings` can be found together in a single element of `list`
"""
for elem in list:
founds = []
for substring in substrings:
founds.append(substring in elem)
if all(founds):
return True
return False


class TestPipelineOutputs(unittest.TestCase):

output_dir = ''
Expand Down Expand Up @@ -84,25 +70,15 @@ def test_folders_have_all_correct_files(self):
assert 'first-pass.txt' in files
assert 'second-pass-alt-alleles.txt' in files

# All bams should be found
self.assertTrue(substring_in_list('__aln_srt_IR_FX.bam', files))
self.assertTrue(substring_in_list('__aln_srt_IR_FX.bai', files))
self.assertTrue(substring_in_list('__aln_srt_IR_FX-duplex.bam', files))
self.assertTrue(substring_in_list('__aln_srt_IR_FX-duplex.bai', files))
self.assertTrue(substring_in_list('__aln_srt_IR_FX-simplex-duplex.bam', files))
self.assertTrue(substring_in_list('__aln_srt_IR_FX-simplex-duplex.bai', files))
self.assertTrue(substring_in_list('_cl_aln_srt_MD_IR_FX_BR.bam', files))
self.assertTrue(substring_in_list('_cl_aln_srt_MD_IR_FX_BR.bai', files))

# All bams should be found, with correct sample_ids
self.assertTrue(substrings_in_list(['__aln_srt_IR_FX.bam', sample_id], files))
self.assertTrue(substrings_in_list(['__aln_srt_IR_FX.bai', sample_id], files))
self.assertTrue(substrings_in_list(['__aln_srt_IR_FX-duplex.bam', sample_id], files))
self.assertTrue(substrings_in_list(['__aln_srt_IR_FX-duplex.bai', sample_id], files))
self.assertTrue(substrings_in_list(['__aln_srt_IR_FX-simplex-duplex.bam', sample_id], files))
self.assertTrue(substrings_in_list(['__aln_srt_IR_FX-simplex-duplex.bai', sample_id], files))
self.assertTrue(substrings_in_list(['_cl_aln_srt_MD_IR_FX_BR.bam', sample_id], files))
self.assertTrue(substrings_in_list(['_cl_aln_srt_MD_IR_FX_BR.bai', sample_id], files))
self.assertTrue(substrings_in_list([STANDARD_BAM_SEARCH, sample_id], files))
self.assertTrue(substrings_in_list([STANDARD_BAI_SEARCH, sample_id], files))
self.assertTrue(substrings_in_list([UNFILTERED_BAM_SEARCH, sample_id], files))
self.assertTrue(substrings_in_list([UNFILTERED_BAI_SEARCH, sample_id], files))
self.assertTrue(substrings_in_list([SIMPLEX_BAM_SEARCH, sample_id], files))
self.assertTrue(substrings_in_list([SIMPLEX_BAI_SEARCH, sample_id], files))
self.assertTrue(substrings_in_list([DUPLEX_BAM_SEARCH, sample_id], files))
self.assertTrue(substrings_in_list([DUPLEX_BAI_SEARCH, sample_id], files))


def parse_arguments():
Expand All @@ -116,7 +92,8 @@ def parse_arguments():
parser.add_argument(
'-l',
'--log_level',
required=True
default='info',
required=False
)
args = parser.parse_args()

Expand All @@ -132,13 +109,7 @@ def setup_logging(args):
'critical': logging.CRITICAL
}

print('here')
if args.log_level:
print('here2')
log_level = LEVELS[args.log_level]
else:
log_level = logging.INFO

log_level = LEVELS[args.log_level]
logger.setLevel(log_level)
ch = logging.StreamHandler()
ch.setLevel(log_level)
Expand Down
32 changes: 27 additions & 5 deletions python_tools/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
from constants import *


# We look for the regex class at runtime:
# https://stackoverflow.com/questions/6102019/type-of-compiled-regex-object-in-python
RETYPE = type(re.compile('duct_typing'))


def read_df(f, header=None):
"""
Helper to read our particular format of metrics files
Expand Down Expand Up @@ -79,20 +84,37 @@ def substring_in_list(substring, list):
:return: True / False if found / not found
"""

# We look for the regex class at runtime:
# https://stackoverflow.com/questions/6102019/type-of-compiled-regex-object-in-python
retype = type(re.compile('duct_typing'))

for elem in list:
if type(substring) == str:
if substring in elem:
return True
elif type(substring) == retype:
elif type(substring) == RETYPE:
if substring.match(elem):
return True
return False


def substrings_in_list(substrings, list):
"""
Check to see that all elements from `substrings` can be found together in a single element of `list`
:param: substrings List of strings or regex literals
:param: list List of elements to search through
:return: True / False if all elements found / not found in single element from `list`
"""
for elem in list:
founds = []
for substring in substrings:
if type(substring) == str:
founds.append(substring in elem)
elif type(substring) == RETYPE:
if substring.match(elem):
founds.append(True)
if all(founds):
return True
return False


def get_position_by_substring(tofind, list):
'''
Get index where `tofind` is a substring of the entry of `list`
Expand Down
1 change: 0 additions & 1 deletion workflows/innovation_pipeline.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,6 @@ steps:
# Make sample output directories #
##################################

# Todo: test that these directories are correctly created
make_bam_output_directories:
run: ../cwl_tools/expression_tools/make_sample_output_dirs.cwl
in:
Expand Down
1 change: 1 addition & 0 deletions workflows/marianas/collapsed_fastq_to_bam.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ steps:
PL: add_rg_PL
PU: add_rg_PU
CN: add_rg_CN
# Todo: this is not used
output_suffix: output_suffix
out: [output_sam]

Expand Down
1 change: 1 addition & 0 deletions workflows/marianas/marianas_collapsing_workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ steps:
add_rg_PU: add_rg_PU
add_rg_SM: add_rg_SM
add_rg_CN: add_rg_CN
# Todo: this is not used
output_suffix:
valueFrom: ${return '_MC_'}
out: [bam, bai]

0 comments on commit 3e79b25

Please sign in to comment.