Skip to content

Commit

Permalink
Revert "move to new IGO_ID column for kickoff"
Browse files Browse the repository at this point in the history
This reverts commit 80ad2f7.
  • Loading branch information
ionox0 committed Jan 29, 2020
1 parent 0545d22 commit 07e0d12
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 14 deletions.
4 changes: 0 additions & 4 deletions python_tools/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,13 @@
TITLE_FILE__SAMPLE_CLASS_COLUMN = "Class"
TITLE_FILE__BARCODE_INDEX_1_COLUMN = "Barcode_index_1"
TITLE_FILE__BARCODE_INDEX_2_COLUMN = "Barcode_index_2"
TITLE_FILE__RUN_ID_COLUMN = "Run_ID"
TITLE_FILE__LANE_COLUMN = "Lane"

# Columns inferred from other columns defined above
TITLE_FILE__SAMPLE_TYPE_COLUMN = "Sample_type"
TITLE_FILE__BAIT_VERSION_COLUMN = "Bait_version"

# Columns defined as constants
TITLE_FILE__IGO_ID_COLUMN = "IGO_ID"
TITLE_FILE__COLLAB_ID_COLUMN = "Collab_ID"

# Columns inteferred from samplesheet metadata/operator column
Expand Down Expand Up @@ -202,7 +200,6 @@
TITLE_FILE__BARCODE_ID_COLUMN,
TITLE_FILE__POOL_COLUMN,
TITLE_FILE__SAMPLE_ID_COLUMN,
TITLE_FILE__IGO_ID_COLUMN,
TITLE_FILE__COLLAB_ID_COLUMN,
TITLE_FILE__PATIENT_ID_COLUMN,
TITLE_FILE__SAMPLE_CLASS_COLUMN,
Expand All @@ -214,7 +211,6 @@
TITLE_FILE__ACCESSION_COLUMN,
TITLE_FILE__BARCODE_INDEX_1_COLUMN,
TITLE_FILE__BARCODE_INDEX_2_COLUMN,
TITLE_FILE__RUN_ID_COLUMN,
TITLE_FILE__LANE_COLUMN,
]

Expand Down
4 changes: 1 addition & 3 deletions python_tools/legacy_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@
TITLE_FILE__BARCODE_ID_COLUMN = 'Barcode'
TITLE_FILE__POOL_COLUMN = 'Pool'
TITLE_FILE__SAMPLE_ID_COLUMN = 'Sample'
TITLE_FILE__IGO_ID_COLUMN = 'IGO_ID'
TITLE_FILE__COLLAB_ID_COLUMN = 'Collab_ID'
TITLE_FILE__PATIENT_ID_COLUMN = 'Patient_ID'
TITLE_FILE__CLASS_COLUMN = 'Class'
Expand All @@ -144,9 +143,8 @@
TITLE_FILE__SEX_COLUMN = 'Sex'
TITLE_FILE__BARCODE_INDEX_1_COLUMN = 'Barcode_index_1'
TITLE_FILE__BARCODE_INDEX_2_COLUMN = 'Barcode_index_2'
TITLE_FILE__RUN_ID_COLUMN = 'Run_ID'
TITLE_FILE__LANE_COLUMN = 'Lane'
TITLE_FILE__STUDY_ID_COLUMN = 'Study_ID'
TITLE_FILE__STUDY_ID_COLUMN = 'Study ID'


##########################
Expand Down
15 changes: 9 additions & 6 deletions python_tools/pipeline_kickoff/create_inputs_from_title_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ def remove_missing_samples_from_title_file(title_file, fastq1, title_file_path):
# Todo: Should we instead raise an error and not continue?
"""
found_boolv = np.array([any([sample in f['path'] for f in fastq1]) for sample in title_file[TITLE_FILE__IGO_ID_COLUMN]])
samples_not_found = title_file.loc[~found_boolv, TITLE_FILE__IGO_ID_COLUMN]
found_boolv = np.array([any([sample in f['path'] for f in fastq1]) for sample in title_file[TITLE_FILE__COLLAB_ID_COLUMN]])
samples_not_found = title_file.loc[~found_boolv, TITLE_FILE__COLLAB_ID_COLUMN]

if samples_not_found.shape[0] > 0:
print(DELIMITER + 'Error: The following samples were missing either a read 1 fastq, read 2 fastq, or sample sheet. ' +
Expand All @@ -160,9 +160,9 @@ def remove_missing_fastq_samples(fastq1, fastq2, sample_sheet, title_file):
Todo: For the SampleSheet files, this relies on the parent folder containing the sample name
"""
fastq1 = filter(lambda f: any([sid in f['path'] for sid in title_file[TITLE_FILE__IGO_ID_COLUMN]]), fastq1)
fastq2 = filter(lambda f: any([sid in f['path'] for sid in title_file[TITLE_FILE__IGO_ID_COLUMN]]), fastq2)
sample_sheet = filter(lambda s: any([sid in s['path'] for sid in title_file[TITLE_FILE__IGO_ID_COLUMN]]), sample_sheet)
fastq1 = filter(lambda f: any([sid in f['path'] for sid in title_file[TITLE_FILE__COLLAB_ID_COLUMN]]), fastq1)
fastq2 = filter(lambda f: any([sid in f['path'] for sid in title_file[TITLE_FILE__COLLAB_ID_COLUMN]]), fastq2)
sample_sheet = filter(lambda s: any([sid in s['path'] for sid in title_file[TITLE_FILE__COLLAB_ID_COLUMN]]), sample_sheet)

return fastq1, fastq2, sample_sheet

Expand Down Expand Up @@ -472,14 +472,17 @@ def perform_validation(title_file, title_file_path, project_name):
1. Sample IDs / Collab IDs must be unique
2. Barcodes must be unique within each lane
3. Sample_type is in ['Plasma', 'Buffy Coat']
4. Sample Class is in ['Tumor', 'Normal']
4. Sample Class is in ['Tumor', 'Normal']
"""
if not project_name in title_file_path:
print('WARNING: project ID not found in title file path. Are you sure you are using the correct title file?')

if np.sum(title_file[TITLE_FILE__SAMPLE_ID_COLUMN].duplicated()) > 0:
raise Exception(DELIMITER + 'Duplicate sample IDs. Exiting.')

#if set([TITLE_FILE__COLLAB_ID_COLUMN]).issubset(title_file.columns) and np.sum(title_file[TITLE_FILE__COLLAB_ID_COLUMN].duplicated()) > 0:
# raise Exception(DELIMITER + 'Duplicate investigator sample IDs. Exiting.')

if np.sum(title_file[TITLE_FILE__SAMPLE_CLASS_COLUMN].isin(ALLOWED_SAMPLE_DESCRIPTION)) < len(title_file):
raise Exception(DELIMITER + 'Not all sample classes are in {}'.format(",".join(ALLOWED_SAMPLE_DESCRIPTION)))

Expand Down
2 changes: 1 addition & 1 deletion python_tools/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def contained_in(sample_id, fastq):
return 0

if use_cmo_sample_id:
boolv = title_file[TITLE_FILE__IGO_ID_COLUMN].apply(contained_in, fastq=sample_object)
boolv = title_file[MANIFEST__CMO_SAMPLE_ID_COLUMN].apply(contained_in, fastq=sample_object)
else:
# Samples from IGO will use the COLLAB_ID
boolv = title_file[TITLE_FILE__COLLAB_ID_COLUMN].apply(contained_in, fastq=sample_object)
Expand Down

0 comments on commit 07e0d12

Please sign in to comment.