From 29292c0393ab28fa1e042cffb15bf6cd357d21eb Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Mon, 13 Jan 2025 23:58:52 +0800 Subject: [PATCH 1/2] fixed empty spectra bugs --- pyproject.toml | 2 +- quantmsrescore/__init__.py | 2 +- quantmsrescore/ms2rescore.py | 107 +++++++++++++++++++++-------------- 3 files changed, 67 insertions(+), 44 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c620b2f..f1cce66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "quantms-rescoring" description = "quantms-rescoring: Python scripts and helpers for the quantMS workflow" readme = "README.md" license = "MIT" -version = "0.0.3" +version = "0.0.4" authors = [ "Yasset Perez-Riverol ", "Dai Chengxin ", diff --git a/quantmsrescore/__init__.py b/quantmsrescore/__init__.py index 27fdca4..81f0fde 100644 --- a/quantmsrescore/__init__.py +++ b/quantmsrescore/__init__.py @@ -1 +1 @@ -__version__ = "0.0.3" +__version__ = "0.0.4" diff --git a/quantmsrescore/ms2rescore.py b/quantmsrescore/ms2rescore.py index 6ae7af3..bb796ae 100644 --- a/quantmsrescore/ms2rescore.py +++ b/quantmsrescore/ms2rescore.py @@ -5,6 +5,7 @@ import importlib.resources import json import logging +import os.path import click import pyopenms as oms @@ -141,24 +142,24 @@ def _parse_psm( def parse_cli_arguments_to_config( - config_file: str = None, - feature_generators: str = None, - ms2pip_model_dir: str = None, - ms2pip_model: str = None, - ms2_tolerance: float = None, - calibration_set_size: float = None, - rescoring_engine: str = None, - rng: int = None, - test_fdr: float = None, - processes: int = None, - spectrum_path: str = None, - fasta_file: str = None, - id_decoy_pattern: str = None, - lower_score_is_better: bool = None, - output_path: str = None, - log_level: str = None, - spectrum_id_pattern: str = None, - psm_id_pattern: str = None + config_file: str = None, + feature_generators: str = None, + ms2pip_model_dir: str = None, + ms2pip_model: str = None, + ms2_tolerance: float = None, + calibration_set_size: float = None, + rescoring_engine: str = None, + rng: int = None, + test_fdr: float = None, + processes: int = None, + spectrum_path: str = None, + fasta_file: str = None, + id_decoy_pattern: str = None, + lower_score_is_better: bool = None, + output_path: str = None, + log_level: str = None, + spectrum_id_pattern: str = None, + psm_id_pattern: str = None ) -> dict: if config_file is None: config = json.load( @@ -254,6 +255,31 @@ def rescore_idxml(input_file, output_file, config) -> None: peptide_ids = reader.new_peptide_ids else: peptide_ids = reader.peptide_ids + + # check if any spectrum is empty + exp = oms.MSExperiment() + oms.MzMLFile().load(config["ms2rescore"]["spectrum_path"], exp) + empty_spectra = 0 + spec = [] + for spectrum in exp: + peaks_tuple = spectrum.get_peaks() + if len(peaks_tuple[0]) == 0 and spectrum.getMSLevel() == 2: + logging.warning( + f"{spectrum.getNativeID()} spectra don't have spectra information!" + ) + empty_spectra += 1 + continue + spec.append(spectrum) + + if empty_spectra != 0: + logging.warning( + f"Removed {empty_spectra} spectra without spectra information!" + ) + exp.setSpectra(spec) + mzml_output = os.path.splitext(os.path.basename(config["ms2rescore"]["spectrum_path"]))[0] + "_clear.mzML" + oms.MzMLFile().store(mzml_output, exp) + config["ms2rescore"]["spectrum_path"] = mzml_output + # Rescore rescore(config, psm_list) @@ -266,7 +292,7 @@ def rescore_idxml(input_file, output_file, config) -> None: def filter_out_artifact_psms( - psm_list: PSMList, peptide_ids: List[oms.PeptideIdentification] + psm_list: PSMList, peptide_ids: List[oms.PeptideIdentification] ) -> List[oms.PeptideIdentification]: """Filter out PeptideHits that could not be processed by all feature generators""" num_mandatory_features = max([len(psm.rescoring_features) for psm in psm_list]) @@ -424,26 +450,26 @@ def filter_out_artifact_psms( ) @click.pass_context def ms2rescore( - ctx, - psm_file: str, - spectrum_path, - output_path: str, - log_level, - processes, - fasta_file, - test_fdr, - feature_generators, - ms2pip_model_dir, - ms2pip_model, - ms2_tolerance, - calibration_set_size, - rescoring_engine, - rng, - id_decoy_pattern, - lower_score_is_better, - config_file: str, - spectrum_id_pattern: str, - psm_id_pattern: str + ctx, + psm_file: str, + spectrum_path, + output_path: str, + log_level, + processes, + fasta_file, + test_fdr, + feature_generators, + ms2pip_model_dir, + ms2pip_model, + ms2_tolerance, + calibration_set_size, + rescoring_engine, + rng, + id_decoy_pattern, + lower_score_is_better, + config_file: str, + spectrum_id_pattern: str, + psm_id_pattern: str ): """ Rescore PSMs in an idXML file and keep other information unchanged. @@ -505,6 +531,3 @@ def ms2rescore( logging.info("MSĀ²Rescore config:") logging.info(config) rescore_idxml(psm_file, output_path, config) - - - From 0b0258c6cb2a702dc0648d6bd5c7986071b8a741 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Tue, 14 Jan 2025 00:00:39 +0800 Subject: [PATCH 2/2] Update ms2rescore.py --- quantmsrescore/ms2rescore.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/quantmsrescore/ms2rescore.py b/quantmsrescore/ms2rescore.py index bb796ae..1e1229c 100644 --- a/quantmsrescore/ms2rescore.py +++ b/quantmsrescore/ms2rescore.py @@ -5,8 +5,7 @@ import importlib.resources import json import logging -import os.path - +import os import click import pyopenms as oms from ms2rescore import package_data, rescore