Skip to content

Commit

Permalink
Allow for sites with insertion codes
Browse files Browse the repository at this point in the history
  • Loading branch information
WillHannon-MCB committed Feb 13, 2024
1 parent 25cd069 commit 6de982b
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 28 deletions.
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,23 @@

### Removed

- N/A


## [1.2.0] - 2024-02-13

### Added

- N/A

### Changed

- It's now possible to include `protein_sites` proceed by insertion codes like (i.e. 52A, 214B, etc...). Previously, `protein_sites` could only be numeric and non-numeric sites were ignored by the `dms-viz`.

### Deprecated

- N/A

### Removed

- N/A
26 changes: 7 additions & 19 deletions configure_dms_viz/configure_dms_viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,13 @@ def format_sitemap_data(sitemap_df, mut_metric_df, included_chains):
"""Check that the sitemap data is in the correct format.
This data should be a pandas.DataFrame with the following columns:
- reference_site: The site number in the reference sequence that
- reference_site: (numeric or string) The site number in the reference sequence that
corresponds to the reference site in the mutation dataframe
- sequential_site: The order of the site in the protein sequence and
- sequential_site: (numeric) The order of the site in the protein sequence and
on the x-axis of the visualization
- protein_site: [Optional] The site number in the protein structure if
different from the reference site
- protein_site: [Optional] (numeric or string) The site number in the protein structure if
different from the reference site. This can include insertion codes and
therefore can be a string.
Parameters
----------
Expand Down Expand Up @@ -182,9 +183,7 @@ def is_numeric(value):
message="\n'protein_site' column is not present in the sitemap. Assuming that the reference sites correspond to protein sites.\n",
fg="yellow",
)
sitemap_df["protein_site"] = sitemap_df["reference_site"].apply(
lambda y: y if is_numeric(y) else ""
)
sitemap_df["protein_site"] = sitemap_df["reference_site"].apply(lambda y: y)
# Check how many of the protein sites are thrown out
num_empty_protein_sites = (sitemap_df["protein_site"] == "").sum()
if num_empty_protein_sites > 0.10 * len(sitemap_df):
Expand All @@ -193,22 +192,11 @@ def is_numeric(value):
fg="red",
)

else:
# Make sure that the provided protein column has no invalid values
if (
not sitemap_df["protein_site"]
.apply(lambda y: y == "" or is_numeric(y))
.all()
):
raise ValueError(
"The protein_site column of the sitemap contains invalid values that cannot be coerced into a numeric form."
)

# If the sitemap doesn't already have a column for chains, add it
if "chains" not in sitemap_df.columns:
# Add the included chains to the sitemap dataframe if there are any
sitemap_df["chains"] = sitemap_df["protein_site"].apply(
lambda y: included_chains if is_numeric(y) else ""
lambda y: included_chains
)

# Drop the columns that aren't needed for the visualization
Expand Down
13 changes: 11 additions & 2 deletions configure_dms_viz/pdb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,19 @@ def check_wildtype_residues(structure, mut_metric_df, sitemap_df, excluded_chain
total_sites += 1
matches_wildtype_at_site = []
site_not_in_structure = []
# Convert the structure object a dictionary to include insertion codes in the residue id
structure_dict = {
chain: {
(str(residue.id[1]) + residue.id[2]).strip(): seq1(residue.resname)
for residue in structure[0][chain]
if residue.id[0] == " "
}
for chain in chains
}
for chain in chains:
try:
residue = structure[0][chain][int(site)]
if seq1(residue.resname) == wildtype.upper():
residue = structure_dict[chain][str(site)]
if residue == wildtype.upper():
matches_wildtype_at_site.append(True)
else:
matches_wildtype_at_site.append(False)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "configure-dms-viz"
version = "1.1.1"
version = "1.2.0"
description = "Configure your data for visualization with dms-viz.github.io"
authors = ["Will Hannon <[email protected]>"]
license = "MIT"
Expand Down

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/IAV-PB1-DMS/output/IAV-PB1-DMS.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/SARS2-RBD-REGN-DMS/output/REGN mAb Cocktail.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/SARS2-RBD-REGN-DMS/output/SARS2-RBD-REGN-DMS.json

Large diffs are not rendered by default.

0 comments on commit 6de982b

Please sign in to comment.