-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocessing.py
65 lines (52 loc) · 2.44 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from urllib.request import urlretrieve
import zipfile
import argparse
from decompose_volume import decompose
from normalise import normalise
description = """
Decorrelates flourescence values and estimated cell volume in
flow cytometry data.
"""
default_url = 'https://data.ncl.ac.uk/ndownloader/articles/12073479/versions/1'
parser = argparse.ArgumentParser(description=description, prog='FlowScatt')
parser.add_argument('--download', dest='url', type=str, nargs='?',
const=default_url, default='',
help='the URL of the zipped data files to retrieve.')
parser.add_argument('--data-dir', type=str, nargs=1,
default='./FCS/', metavar='FILENAME',
help='the directory in which to store the data.')
parser.add_argument('-f', '--description-file', type=str, nargs=1,
default='file_description.csv', metavar='FILENAME',
help='the file which describes the fcs file\'s data.')
parser.add_argument('-c', '--channels', nargs=2, metavar='CHANNEL',
default=['FSC_H', 'GFP_H'],
help='the channels to use for the analysis.')
parser.add_argument('--min-to-back', action='store_true',
default=False,
help='''If present, normalisation if performed using
the minimum flourescence value, not the
autofluorescence plasmid''')
parser.add_argument('-o', '--outfile', type=str, nargs=1, metavar='FILENAME',
default='standardised.csv',
help='the file path to write the processed data to.')
def download_and_extract(url):
print("starting download, this may take some time")
urlretrieve(url, "FCS.zip")
print("files downloaded,starting unpacking")
with zipfile.ZipFile("FCS.zip", 'r') as zip_ref:
zip_ref.extractall(DATA_DIR)
if __name__ == '__main__':
args = parser.parse_args()
URL = args.url
DATA_DIR = args.data_dir
CHANNELS = args.channels
DESC_FILE = args.description_file
MIN_TO_BACK = args.min_to_back
OUTFILE = args.outfile
if URL:
download_and_extract(URL)
with open('volume_decomposed.csv', 'w', newline='') as outfile:
decompose(outfile, DESC_FILE, DATA_DIR, CHANNELS)
with open('volume_decomposed.csv') as infile:
with open(OUTFILE, 'w', newline='') as outfile:
normalise(infile, outfile, min_to_back=MIN_TO_BACK)