Skip to content

Commit

Permalink
Merge pull request #178 from CDCgov/samplesheet_check_metadata_ufp0
Browse files Browse the repository at this point in the history
Samplesheet check metadata ufp0
  • Loading branch information
kyleoconnell authored Mar 7, 2024
2 parents ec0bd38 + 3159eb1 commit 6b9f719
Show file tree
Hide file tree
Showing 30 changed files with 178 additions and 449 deletions.
Binary file modified assets/sample_metadata/Cdiphtheriae_test_1.xlsx
Binary file not shown.
Binary file modified assets/sample_metadata/VARV_metadata_Sample_Run_1.xlsx
Binary file not shown.
Binary file not shown.
13 changes: 8 additions & 5 deletions bin/validate_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,13 @@ def metadata_validation_main():
sample_df = final_df.iloc[row].to_frame().transpose()
sample_df = sample_df.set_index('sequence_name')
sample_dfs[final_df.iloc[row]['sequence_name']] = sample_df
# now export the .xlsx file as a .tsv
# now export the .xlsx file as a .tsv and csv
for sample in sample_dfs.keys():
tsv_file = f'{parameters["output_dir"]}/{parameters["file_name"]}/tsv_per_sample/{sample}.tsv'
sample_dfs[sample].to_csv(tsv_file, sep="\t")
# *** Added this to export to csv as well *** #
csv_file = f'{parameters["output_dir"]}/{parameters["file_name"]}/tsv_per_sample/{sample}.csv'
sample_dfs[sample].to_csv(csv_file)
print(f'\nMetadata Validation was Successful!!!\n')
else:
print(f'\nMetadata Validation Failed Please Consult : {parameters["output_dir"]}/{parameters["file_name"]}/errors/full_error.txt for a Detailed List\n')
Expand Down Expand Up @@ -957,7 +960,8 @@ def change_col_names(self):

# todo: this is a temporary fx to convert the illumina paths as input to seqsender
def change_illumina_paths(self):
""" Change illumina_sra_file_path_1 & illumina_sra_file_path_2 to sra-file_name
""" Create sra-file_name from illumina_sra_file_path_1 & illumina_sra_file_path_2
Rename illumina_sra_file_path_1 & illumina_sra_file_path_2 to fastq_path_1 & fastq_path_2
"""

# function to extract file name from path
Expand All @@ -970,10 +974,9 @@ def extract_filename(path):
# create new column 'sra-file_name'
self.filled_df['sra-file_name'] = self.filled_df.apply(lambda row: extract_filename(row['illumina_sra_file_path_1']) + ',' + extract_filename(row['illumina_sra_file_path_2']), axis=1)

# drop original columns
self.filled_df = self.filled_df.drop(['illumina_sra_file_path_1', 'illumina_sra_file_path_2'], axis=1)
# rename original columns
self.filled_df = self.filled_df.rename(columns={'illumina_sra_file_path_1': 'fastq_path_1', 'illumina_sra_file_path_2': 'fastq_path_2'})


# todo: apply this function to Ankush's insert checks as well
def check_nan_for_column(self, column_name):
""" Check for NaN values (if not a string) in a column of the dataframe """
Expand Down
21 changes: 19 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,34 @@
process {

publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
path: { "${params.output_dir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

withName: METADATA_VALIDATION {
publishDir = [
path: { "${params.outdir}" },
path: { "${params.output_dir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: BAKTA {
publishDir = [
path: { "${params.output_dir/bakta_outputs}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: VADR_POST_CLEANUP {
publishDir = [
path: { "${params.output_dir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}


}
25 changes: 25 additions & 0 deletions modules/local/extract_inputs/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process EXTRACT_INPUTS {
input:
file tsvFile

output:
tuple val(sequence_name), path(fasta, true), path(fastq_1, true), path(fastq_2, true) into samplesChannel

script:
"""
samples = file("${tsvFile}").readLines().drop(1).splitCsv(header: true, sep: '\t')
def input_files = []
samples.each { sample ->
sequence_name = sample.sequence_name
fasta = sample.containsKey('fasta_path') ? path(sample.fasta_path) : null
fastq = sample.containsKey('sra-file_name') ? sample['sra-file_name'].split(',') : ['', '']
fastq_1 = fastq[0] != '' ? path(fastq[0]) : null
fastq_2 = fastq[1] != '' ? path(fastq[1]) : null
input_files << tuple(sequence_name, fasta, fastq_1, fastq_2)
}
emit input_files
"""
}
6 changes: 6 additions & 0 deletions modules/local/general_util/check_files/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
*/
process CHECK_FILES {

<<<<<<< HEAD
// label 'main'
conda (params.enable_conda ? params.env_yml : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' :
'staphb/tostadas:latest' }"
=======
conda (params.enable_conda ? params.env_yml : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"
>>>>>>> 883756c... cleaned up containers and publish dirs

input:
val signal
Expand Down
17 changes: 3 additions & 14 deletions modules/local/general_util/merge_upload_log/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,12 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
process MERGE_UPLOAD_LOG {

//label 'main'

conda (params.enable_conda ? params.env_yml : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' :
'staphb/tostadas:latest' }"

publishDir "$params.output_dir/$params.submission_output_dir/$annotation_name", mode: 'copy', overwrite: params.overwrite_output

if ( params.run_conda == true ) {
try {
conda params.env_yml
} catch (Exception e) {
System.err.println("WARNING: Unable to use conda env from $params.env_yml")
}
}
conda (params.enable_conda ? params.env_yml : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"

input:
path submission_files
Expand Down
36 changes: 0 additions & 36 deletions modules/local/initial_submission/main.nf

This file was deleted.

8 changes: 4 additions & 4 deletions modules/local/initial_submission/main_full.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ process SUBMISSION_FULL {
'cdcgov/seqsender-dev' : 'cdcgov/seqsender-dev' }"

input:
tuple val(meta), path(validated_meta_path), path(fasta_path), path(annotations_path)
path(fastq_dir)
tuple val(meta), path(validated_meta_path), path(fasta_path), path(fastq_1), path(fastq_2), path(annotations_path)
path submission_config
val annotation_name

Expand All @@ -25,8 +24,9 @@ process SUBMISSION_FULL {

script:
"""
mkdir $meta.id
mv $fastq_dir $meta.id/raw_reads
mkdir -p $meta.id $meta.id/raw_reads
mv $fastq_1 $meta.id/raw_reads/
mv $fastq_2 $meta.id/raw_reads/
submission.py submit \
--genbank --sra --biosample \
Expand Down
3 changes: 1 addition & 2 deletions modules/local/initial_submission/main_genbank.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ process SUBMISSION_GENBANK {
'cdcgov/seqsender-dev' : 'cdcgov/seqsender-dev' }"

input:
tuple val(meta), path(validated_meta_path), path(fasta_path), path(annotations_path)
path(fastq_dir)
tuple val(meta), path(validated_meta_path), path(fasta_path), path(fastq_1), path(fastq_2), path(annotations_path)
path submission_config
val annotation_name

Expand Down
8 changes: 4 additions & 4 deletions modules/local/initial_submission/main_sra.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ process SUBMISSION_SRA {
'cdcgov/seqsender-dev' : 'cdcgov/seqsender-dev' }"

input:
tuple val(meta), path(validated_meta_path)
path(fastq_dir)
tuple val(meta), path(validated_meta_path), path(fasta_path), path(fastq_1), path(fastq_2)
path submission_config
val annotation_name

Expand All @@ -24,8 +23,9 @@ process SUBMISSION_SRA {

script:
"""
mkdir $meta.id
mv $fastq_dir $meta.id/raw_reads
mkdir -p $meta.id $meta.id/raw_reads
mv $fastq_1 $meta.id/raw_reads/
mv $fastq_2 $meta.id/raw_reads/
submission.py submit \
--sra \
Expand Down
7 changes: 3 additions & 4 deletions modules/local/liftoff_annotation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
*/
process LIFTOFF {

label 'main'
// label 'main'

conda (params.enable_conda ? params.env_yml : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/liftoff:1.6.3--pyhdfd78af_0' :
'quay.io/biocontainers/liftoff:1.6.3--pyhdfd78af_0'}"

'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"

publishDir "$params.output_dir", mode: 'copy', overwrite: params.overwrite_output

input:
Expand Down
20 changes: 13 additions & 7 deletions modules/local/metadata_validation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,33 @@
*/
process METADATA_VALIDATION {

//label 'main'

// label 'main'

//errorStrategy { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
//maxRetries 5

conda (params.enable_conda ? params.env_yml : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' :
'staphb/tostadas:latest' }"
'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"

// publishDir "$params.output_dir", mode: 'copy', overwrite: params.overwrite_output
publishDir "$params.output_dir", mode: 'copy', overwrite: params.overwrite_output

input:
val signal
path meta_path

script:
"""
validate_metadata.py --meta_path $meta_path --output_dir $params.val_output_dir \
--custom_fields_file $params.custom_fields_file --validate_custom_fields $params.validate_custom_fields
validate_metadata.py \
--meta_path $meta_path \
--output_dir $params.val_output_dir \
--custom_fields_file $params.custom_fields_file \
--validate_custom_fields $params.validate_custom_fields
"""

output:
path "$params.val_output_dir/*/tsv_per_sample/*.tsv", emit: tsv_Files
path "$params.val_output_dir/*/tsv_per_sample/*.csv", emit: csv_Files
path "$params.val_output_dir/*/tsv_per_sample", emit: tsv_dir
path "$params.val_output_dir/*/errors", emit: errors
}
8 changes: 8 additions & 0 deletions modules/local/post_bakta_annotation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
process BAKTA_POST_CLEANUP {

<<<<<<< HEAD
//label 'main'

conda (params.enable_conda ? params.env_yml : null)
Expand All @@ -13,6 +14,13 @@ process BAKTA_POST_CLEANUP {
'staphb/tostadas:latest' }"

publishDir "$params.output_dir", mode: 'copy', overwrite: params.overwrite_output
=======
conda (params.enable_conda ? params.env_yml : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"

// publishDir "$params.output_dir", mode: 'copy', overwrite: params.overwrite_output
>>>>>>> 883756c... cleaned up containers and publish dirs

input:
path bakta_results
Expand Down
12 changes: 6 additions & 6 deletions modules/local/post_vadr_annotation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@
*/
process VADR_POST_CLEANUP {

//label 'main'
// label 'main'

conda (params.enable_conda ? params.env_yml : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' :
'staphb/tostadas:latest' }"
'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"


publishDir "$params.output_dir", mode: 'copy', overwrite: params.overwrite_output

input:
path vadr_outputs
path meta_path
path fasta_path
tuple val(meta), path meta_path
tuple val(meta), path(fasta_path)

script:
"""
Expand Down
27 changes: 0 additions & 27 deletions modules/local/samplesheet_check/main.nf

This file was deleted.

Loading

0 comments on commit 6b9f719

Please sign in to comment.