diff --git a/Dockerfile b/Dockerfile index 2fde54a8..59fc4000 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,8 @@ ADD README.md /ms2rescore/README.md ADD MANIFEST.in /ms2rescore/MANIFEST.in ADD ms2rescore /ms2rescore/ms2rescore -RUN apt-get update && apt-get install python3-pip procps -y \ +RUN apt-get update \ + && apt-get install -y python3-pip procps libglib2.0-0 libsm6 libxrender1 libxext6 \ && rm -rf /var/lib/apt/lists/* \ && pip3 install ms2rescore/ diff --git a/ms2rescore/parse_psms.py b/ms2rescore/parse_psms.py index e116c32f..ce0c968d 100644 --- a/ms2rescore/parse_psms.py +++ b/ms2rescore/parse_psms.py @@ -41,13 +41,29 @@ def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList: ) logger.debug("Parsing modifications...") + modifications_found = set( + [ + re.search(r"\[([^\[\]]*)\]", x.proforma).group(1) + for x in psm_list["peptidoform"] + if "[" in x.proforma + ] + ) + logger.debug(f"Found modifications: {modifications_found}") + non_mapped_modifications = modifications_found - set(config["modification_mapping"].keys()) + if non_mapped_modifications: + logger.warning( + f"Non-mapped modifications found: {non_mapped_modifications}\nThis can be ignored if Unimod modification label" + ) psm_list.rename_modifications(config["modification_mapping"]) psm_list.add_fixed_modifications(config["fixed_modifications"]) psm_list.apply_fixed_modifications() - logger.debug("Applying `psm_id_pattern`...") if config["psm_id_pattern"]: pattern = re.compile(config["psm_id_pattern"]) + logger.debug(f"Applying `psm_id_pattern`...") + logger.debug( + f"Parsing `{psm_list['spectrum_id'][0]}` to `{_match_psm_ids(psm_list['spectrum_id'][0], pattern)}`" + ) new_ids = [_match_psm_ids(old_id, pattern) for old_id in psm_list["spectrum_id"]] psm_list["spectrum_id"] = new_ids @@ -125,6 +141,6 @@ def _match_psm_ids(old_id, regex_pattern): return match[1] except (TypeError, IndexError): raise MS2RescoreConfigurationError( - "`psm_id_pattern` could not be matched to all PSM spectrum IDs." + f"`psm_id_pattern` could not be extracted from PSM spectrum IDs (i.e. {old_id})." " Ensure that the regex contains a capturing group?" )