Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed read-bioinfo-metadata module #367

Merged
merged 31 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
da8d7c0
Fixed path to files not containing sample name in basename
svarona Jan 10, 2025
8eede46
Added batchID/date to filenames instead of date
svarona Jan 13, 2025
ea5a947
add current date to splitted files
svarona Jan 13, 2025
d7a1516
add year to outdir path
svarona Jan 13, 2025
48a6185
test whether samples correspond to same batch date or not
svarona Jan 13, 2025
ffbf598
Added function to save splitted files to outdir
svarona Jan 13, 2025
2b0447e
test if bioinfo_lab_metadata exist, if exists merge
svarona Jan 13, 2025
1d30be2
Created code to merge metadata if already exist
svarona Jan 13, 2025
e9b04da
created function to search files containing muiltiple samples
svarona Jan 13, 2025
f61e538
test whether file exist and try to merge
svarona Jan 13, 2025
f226a8c
fixed black
svarona Jan 13, 2025
b37e8a4
Fixed flake8
svarona Jan 13, 2025
97d944e
fixed black
svarona Jan 14, 2025
e10a094
added creation of log in batch folder
svarona Jan 14, 2025
6864e95
fixed regex to find long table in analysis_results
svarona Jan 14, 2025
f58e073
removed log that was not working
svarona Jan 14, 2025
718e66b
split files by batch first
svarona Jan 14, 2025
72c76b3
created unique sufix for all files in batch
svarona Jan 14, 2025
68f6b98
renamed save_splitted_files to save_merged_files
svarona Jan 14, 2025
0d82573
added code to save splitted long table to batch dir
svarona Jan 14, 2025
09b58da
fixed black
svarona Jan 14, 2025
f08c12a
added log errors
svarona Jan 14, 2025
d09ca26
replaced batch_id with batch_date
svarona Jan 14, 2025
55bd611
replaced batch_id with batch_date
svarona Jan 14, 2025
a480315
Updated changelog
svarona Jan 14, 2025
d6cd9be
Added creation of analysis_results earlier for module to work
svarona Jan 15, 2025
2af25d7
ignored flake error
svarona Jan 15, 2025
a68a3eb
flixed black and flake
svarona Jan 15, 2025
97be573
fixed black and flake
svarona Jan 15, 2025
6c2556e
finally fixed flake8
svarona Jan 15, 2025
e90d33d
removed print
svarona Jan 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Code contributions to the release:

#### Fixes

- Fixed read-bioinfo-metadata module [#367](https://github.com/BU-ISCIII/relecov-tools/pull/367)

#### Changed

#### Removed
Expand Down
66 changes: 50 additions & 16 deletions relecov_tools/assets/pipeline_utils/viralrecon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os.path

from pathlib import Path
from datetime import datetime

import relecov_tools.utils
from relecov_tools.config_json import ConfigJson
Expand Down Expand Up @@ -135,7 +134,7 @@ def convert_to_json(self, samp_dict):
j_list = []
# Grab date from filename
result_regex = re.search(
"variants_long_table(?:_\d{8})?\.csv", os.path.basename(self.file_path)
"variants_long_table(?:_\d{14})?\.csv", os.path.basename(self.file_path)
)
if result_regex is None:
stderr.print(
Expand All @@ -153,18 +152,53 @@ def convert_to_json(self, samp_dict):
j_list.append(j_dict)
return j_list

def save_to_file(self, j_list):
def save_to_file(self, j_list, batch_date):
"""Transform the parsed data into a json file"""
date_now = datetime.now().strftime("%Y%m%d%H%M%S")
file_name = "long_table_" + date_now + ".json"
file_name = "long_table_" + batch_date + ".json"
file_path = os.path.join(self.output_directory, file_name)

try:
with open(file_path, "w") as fh:
fh.write(json.dumps(j_list, indent=4))
stderr.print("[green]\tParsed data successfully saved to file:", file_path)
except Exception as e:
stderr.print("[red]\tError saving parsed data to file:", str(e))
if os.path.exists(file_path):
stderr.print(
f"[blue]Long table {file_path} file already exists. Merging new data if possible."
)
log.info(
"Long table %s file already exists. Merging new data if possible."
% file_path
)
original_table = relecov_tools.utils.read_json_file(file_path)
samples_indict = {item["sample_name"]: item for item in original_table}
for item in j_list:
sample_name = item["sample_name"]
if sample_name in samples_indict:
if samples_indict[sample_name] != item:
stderr.print(
f"[red]Same sample {sample_name} has different data in both long tables."
)
log.error(
"Sample %s has different data in %s and new long table. Can't merge."
% (sample_name, file_path)
)
return None
else:
original_table.append(item)
try:
with open(file_path, "w") as fh:
fh.write(json.dumps(original_table, indent=4))
stderr.print(
"[green]\tParsed data successfully saved to file:", file_path
)
except Exception as e:
stderr.print("[red]\tError saving parsed data to file:", str(e))
log.error("Error saving parsed data to file: %s", e)
else:
try:
with open(file_path, "w") as fh:
fh.write(json.dumps(j_list, indent=4))
stderr.print(
"[green]\tParsed data successfully saved to file:", file_path
)
except Exception as e:
stderr.print("[red]\tError saving parsed data to file:", str(e))
log.error("Error saving parsed data to file: %s", e)

def parsing_csv(self):
"""
Expand All @@ -180,7 +214,7 @@ def parsing_csv(self):


# START util functions
def handle_pangolin_data(files_list, output_folder=None):
def handle_pangolin_data(files_list, batch_date, output_folder=None):
"""File handler to parse pangolin data (csv) into JSON structured format.

Args:
Expand Down Expand Up @@ -320,7 +354,7 @@ def get_pango_data_version(files_list):
return pango_data_processed


def parse_long_table(files_list, output_folder=None):
def parse_long_table(files_list, batch_date, output_folder=None):
"""File handler to retrieve data from long table files and convert it into a JSON structured format.
This function utilizes the LongTableParse class to parse the long table data.
Since this utility handles and maps data using a custom way, it returns None to be avoid being transferred to method read_bioinfo_metadata.BioinfoMetadata.mapping_over_table().
Expand Down Expand Up @@ -349,7 +383,7 @@ def parse_long_table(files_list, output_folder=None):
# Parsing long table data and saving it
long_table_data = long_table.parsing_csv()
# Saving long table data into a file
long_table.save_to_file(long_table_data)
long_table.save_to_file(long_table_data, batch_date)
stderr.print("[green]\tProcess completed")
elif len(files_list) > 1:
method_log_report.update_log_report(
Expand All @@ -361,7 +395,7 @@ def parse_long_table(files_list, output_folder=None):
return None


def handle_consensus_fasta(files_list, output_folder=None):
def handle_consensus_fasta(files_list, batch_date, output_folder=None):
"""File handler to parse consensus data (fasta) into JSON structured format.

Args:
Expand Down
4 changes: 4 additions & 0 deletions relecov_tools/conf/bioinfo_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"header_row_idx": 1,
"required": true,
"function": null,
"multiple_samples": true,
"split_by_batch": true,
"content": {
"analysis_date": "analysis_date",
Expand Down Expand Up @@ -44,6 +45,7 @@
"sample_col_idx": 1,
"header_row_idx": 1,
"required": true,
"multiple_samples": true,
"split_by_batch": true,
"function": "parse_long_table",
"content": {
Expand Down Expand Up @@ -84,6 +86,7 @@
"fn": "summary_variants_metrics_mqc.csv",
"sample_col_idx": 1,
"header_row_idx": 1,
"multiple_samples": true,
"required": true,
"function": null,
"content": {
Expand All @@ -100,6 +103,7 @@
"workflow_summary": {
"fn": "multiqc_report.html",
"required": true,
"multiple_samples": true,
"function": null,
"content": {
"software_version": {
Expand Down
Loading
Loading