-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrouper.py
32 lines (27 loc) · 1010 Bytes
/
grouper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from __future__ import print_function
import argparse
import sys
parser = argparse.ArgumentParser(description='Groups transcript lists together.')
parser.add_argument('fnames', metavar='File', type=str, nargs='+',
help='The names of the files which you would like to overlap')
parser.add_argument('--require-samples', metavar='#', type=int, nargs='?',
help='The number of samples to require a transcript appear in to be included in the output list. Default: number of files given.')
args = parser.parse_args()
all_genes = set()
all_samples = []
print("Processing files...", args.fnames, file=sys.stderr)
for file in args.fnames:
with open(file, 'r') as f:
data = f.read().strip().splitlines()
for l in data:
all_genes.add( l )
all_samples.append( data )
for gene in sorted( all_genes ):
score = sum( 1 for x in all_samples if gene in x )
if args.require_samples is not None:
if score < args.require_samples:
continue
else:
if score is not len(args.fnames):
continue
print(gene)