half-entered new dfe

got proportion added dom coefficients almost finished added lethal mutations Update dfes.py add uniform DFE (closes #1481) h-s relationship (closes #1488) formatting test dominance coeffs actually in there remove deprecated set-output (see https://github.blog/changelog/2022-10-11-github-actions-deprecating-save-state-and-set-output-commands/ ) and bump cache package version bumps remove python 3.7 as it is end-of-life and creates install problems for scikit-allel make sure the tests are testing what they're supposed to test chris comments changelog clarify manual more chris comments bump cache?!?! using homsap mu using homsap mu flake8 delete stray print different neutral prop Update stdpopsim/catalog/HomSap/dfes.py Co-authored-by: Peter Ralph <[email protected]> Update stdpopsim/catalog/HomSap/dfes.py Co-authored-by: Peter Ralph <[email protected]> Update stdpopsim/catalog/HomSap/dfes.py Co-authored-by: Peter Ralph <[email protected]> updates to new dfe Apply suggestions from code review reduce cache num again fixxxxxxxxxxxxxxxxxxxxxxxxx bump cache trying changeing tests.yml trying changeing tests.yml reduce cache num again
popsim-consortium · Aug 18, 2023 · a27d371 · a27d371
1 parent 5f319a5
commit a27d371
Show file tree

Hide file tree

Showing 8 changed files with 546 additions and 94 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -18,9 +18,8 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-20.04, macos-11, windows-latest]
-        python: [3.7, "3.10"]
+        python: [3.8, "3.10"]
     env:
-      CONDA_ENV_NAME: stdpopsim
       OS: ${{ matrix.os }}
       PYTHON: ${{ matrix.python }}
 
@@ -36,7 +35,7 @@ jobs:
       - name: find conda
         id: find-conda
         run: |
-          echo "::set-output name=CONDA::$CONDA"
+          echo "name=CONDA::$CONDA" >> $GITHUB_OUTPUT
 
       - name: fix conda permissions
         if: runner.os == 'macOS'
@@ -49,7 +48,7 @@ jobs:
         uses: actions/cache@v2
         env:
           # Increase this to reset the cache if the key hasn't changed.
-          CACHE_NUM: 2
+          CACHE_NUM: 4
         with:
           path: |
             ${{ steps.find-conda.outputs.CONDA }}/envs/${{ env.CONDA_ENV_NAME }}
@@ -60,7 +59,7 @@ jobs:
         uses: conda-incubator/setup-miniconda@v2
         if: steps.cache.outputs.cache-hit != 'true'
         with:
-          activate-environment: ${{ env.CONDA_ENV_NAME }}
+          activate-environment: stdpopsim
           python-version: ${{ matrix.python }}
           channels: conda-forge
           channel-priority: strict

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,22 @@
 [0.2.1] - 2023-XX-XX
 --------------------
 
+**Breaking changes**:
+
+- To add the possibility for a relationship between dominance and selection
+    coefficient, now each stdpopsim MutationType might have more than one
+    underlying SLiM mutation type; so, where this is recorded in top-level
+    metadata (under `ts.metadata["stdpopsim"]["DFEs"]`) is now a list
+    instead of a single value. This will not affect anyone who is not
+    parsing the metadata related to DFEs.
+
+**New features**:
+
+- *Relationship between dominance and selection coefficient:*
+    Added the `dominance_coeff_list` argument to `MutationType`, allowing
+    for DFEs with a discretized relationship between h and s.
+    (:user:`petrelharp`, :pr:`1498`)
+
 **New species**:
 
 - Mus musculus (:user:`peterdfields`, :pr:`1437`).

diff --git a/requirements/CI/requirements.txt b/requirements/CI/requirements.txt
@@ -1,10 +1,10 @@
 coverage==6.3.3
 flake8==5.0.4
-pytest==7.2.1
-pytest-cov==4.0.0
-pytest-xdist==3.2.0
+pytest==7.3.2
+pytest-cov==4.1.0
+pytest-xdist==3.3.1
 setuptools-scm==7.1.0
-sphinx==4.3.2
+sphinx==5.0.2
 sphinx-argparse==0.4.0
 sphinx-issues==3.0.1
 sphinx_rtd_theme==1.2.0
@@ -14,11 +14,9 @@ attrs==21.4.0
 appdirs==1.4.4
 humanize==4.6.0
 pyslim==1.0.1
-numpy==1.21.5; python_version=='3.7'
-numpy==1.22.3; python_version>='3.8'
-scipy; python_version=='3.7'
-scipy==1.10.1; python_version>='3.8'
-scikit-allel==1.3.5
+numpy==1.22.3
+scipy==1.10.1
+scikit-allel==1.3.6
 biopython==1.80
 demes==0.2.2
 demesdraw==0.3.1

diff --git a/stdpopsim/catalog/HomSap/dfes.py b/stdpopsim/catalog/HomSap/dfes.py
@@ -147,3 +147,56 @@ def _HuberLogNormalDFE():
 
 
 _species.add_dfe(_HuberLogNormalDFE())
+
+
+def _KyriazisDFE():
+    id = "Mixed_K23"
+    description = "Deleterious Gamma DFE with additional lethals"
+    long_description = """
+    The DFE estimated from human data recommended in Kyriazis et al.
+    (2023), https://doi.org/10.1086/726736, for general use.
+    This model is similar to the Kim et al. (2017) DFE based on human
+    genetic data, modified to include the dominance distribution from
+    Henn et al. (2016).
+    The model is also augmented with an additional proportion of 0.3% of
+    recessive lethals, based on the analysis of Wade et al. (2023).
+    """
+    citations = [
+        stdpopsim.Citation(
+            author="Kyriazis et al.",
+            year=2023,
+            doi="https://doi.org/10.1086/726736",
+            reasons={stdpopsim.CiteReason.DFE},
+        )
+    ]
+    neutral = stdpopsim.MutationType()
+    gamma_mean = -0.0131
+    gamma_shape = 0.186
+    coefs = [0.45, 0.2, 0.05, 0]
+    breaks = [0.001, 0.01, 0.1]
+    gamma = stdpopsim.MutationType(
+        dominance_coeff_list=coefs,
+        dominance_coeff_breaks=breaks,
+        distribution_type="g",  # gamma distribution
+        distribution_args=[gamma_mean, gamma_shape],
+    )
+    lethal = stdpopsim.MutationType(
+        distribution_type="f",  # fixed value
+        distribution_args=[-1],  # fitness in SLiM for homozygotes is multiiplied by 1+s
+        dominance_coeff=0,
+    )
+    proportion_deleterious = 2.31 / (1 + 2.31)
+    lethal_prop = proportion_deleterious * 0.003  # 0.3% lethals
+    gamma_prop = proportion_deleterious - lethal_prop
+    neutral_prop = 1 - proportion_deleterious
+    return stdpopsim.DFE(
+        id=id,
+        description=description,
+        long_description=long_description,
+        mutation_types=[neutral, gamma, lethal],
+        proportions=[neutral_prop, gamma_prop, lethal_prop],
+        citations=citations,
+    )
+
+
+_species.add_dfe(_KyriazisDFE())
diff --git a/stdpopsim/dfe.py b/stdpopsim/dfe.py
@@ -8,11 +8,17 @@
 import numpy as np
 
 
+def _copy_converter(x):
+    if isinstance(x, list):
+        x = x.copy()
+    return x
+
+
 @attr.s(kw_only=True)
 class MutationType(object):
     """
-    Class representing a "type" of mutation.  The design closely (exactly)
-    mirrors SLiM's MutationType class.
+    Class representing a "type" of mutation.  The design closely mirrors SLiM's
+    MutationType class.
 
     The main thing that mutation types carry is a way of drawing a selection
     coefficient for each new mutation. This ``distribution_type`` should be one
@@ -23,6 +29,7 @@ class MutationType(object):
     - ``g``: gamma, two parameters (mean, shape)
     - ``n``: normal, two parameters (mean, SD)
     - ``w``: Weibull, two parameters (scale, shape)
+    - ``u``: Uniform, two parameters (min, max)
     - ``lp``: positive logNormal, two parameters (mean and sd on log scale; see rlnorm)
     - ``ln``: negative logNormal, two parameters (mean and sd on log scale; see rlnorm)
 
@@ -31,28 +38,99 @@ class MutationType(object):
     exponential and gamma, a negative mean can be provided, obtaining always
     negative values.
 
+    Instead of a single dominance coefficient (which would be specified with
+    `dominance_coeff`), a discretized relationship between dominance and
+    selection coefficient can be implemented: if dominance_coeff_list is
+    provided, mutations with selection coefficient s for which
+    dominance_coeff_breaks[k-1] <= s <= dominance_coeff_breaks[k] will have
+    dominance coefficient dominance_coeff[k]. In other words, the first entry
+    of dominance_coeff_list applies to any mutations with selection coefficient
+    below the first entry of dominance_coeff_breaks; the second entry of
+    dominance_coeff_list applies to mutations with selection coefficient
+    between the first and second entries of dominance_coeff_breaks, and so
+    forth. The list of breaks must therefore be of length one less than the
+    list of dominance coefficients.
+
     :ivar distribution_type: A one-letter abbreviation for the distribution of
         fitness effects that each new mutation of this type draws from (see below).
     :vartype distribution_type: str
     :ivar distribution_args: Arguments for the distribution type.
     :vartype distribution_type: list
-    :ivar dominance_coeff: A single dominance coefficient (negative = underdominance,
+    :ivar dominance_coeff: The dominance coefficient (negative = underdominance,
         0 = recessive, 0.5 = additive, 1.0 = completely dominant, > 1.0 = overdominant)
+        Default: 0.5.
     :vartype dominance_coeff: float
     :ivar convert_to_substitution: Whether to retain any fixed mutations in the
         simulation: if not, we cannot ask about their frequency once fixed.
         (Either way, they will remain in the tree sequence).
     :vartype convert_to_substitution: bool
+    :ivar dominance_coeff_list: Either None (the default) or a list of floats describing
+        a list of dominance coefficients, to apply to different selection coefficients
+        (see details). Cannot be specified along with dominance_coeff.
+    :vartype dominance_coeff_list: list of floats
+    :ivar dominance_coeff_breaks: Either None (the default) or a list of floats
+        describing the intervals of selection coefficient over which each of the entries
+        of dominance_coeff_list applies (see details). Must be of length one shorter than
+        dominance_coeff_list.
+    :vartype dominance_coeff_breaks: list of floats
     """
 
-    dominance_coeff = attr.ib(default=0.5, type=float)
+    dominance_coeff = attr.ib(default=None, type=float)
     distribution_type = attr.ib(default="f", type=str)
-    distribution_args = attr.ib(factory=lambda: [0], type=list)
+    distribution_args = attr.ib(
+        factory=lambda: [0], type=list, converter=_copy_converter
+    )
     convert_to_substitution = attr.ib(default=True, type=bool)
+    dominance_coeff_list = attr.ib(default=None, type=list, converter=_copy_converter)
+    dominance_coeff_breaks = attr.ib(default=None, type=list, converter=_copy_converter)
 
     def __attrs_post_init__(self):
-        if not isinstance(self.dominance_coeff, (float, int)):
-            raise ValueError("dominance_coeff must be a number.")
+        if self.dominance_coeff is None and self.dominance_coeff_list is None:
+            self.dominance_coeff = 0.5
+
+        if self.dominance_coeff is not None:
+            if (self.dominance_coeff_list is not None) or (
+                self.dominance_coeff_breaks is not None
+            ):
+                raise ValueError(
+                    "Cannot specify both dominance_coeff and dominance_coeff_list."
+                )
+            if not isinstance(self.dominance_coeff, (float, int)):
+                raise ValueError("dominance_coeff must be a number.")
+            if not np.isfinite(self.dominance_coeff):
+                raise ValueError(
+                    f"Invalid dominance coefficient {self.dominance_coeff}."
+                )
+
+        if self.dominance_coeff_list is not None:
+            # disallow the inefficient and annoying length-one case
+            if len(self.dominance_coeff_list) < 2:
+                raise ValueError("dominance_coeff_list must have at least 2 elements.")
+            for h in self.dominance_coeff_list:
+                if not isinstance(h, (float, int)):
+                    raise ValueError("dominance_coeff_list must be a list of numbers.")
+                if not np.isfinite(h):
+                    raise ValueError(f"Invalid dominance coefficient {h}.")
+            if self.dominance_coeff_breaks is None:
+                raise ValueError(
+                    "A list of dominance coefficients provided but no breaks."
+                )
+            if len(self.dominance_coeff_list) != len(self.dominance_coeff_breaks) + 1:
+                raise ValueError(
+                    "len(dominance_coeff_list) must be equal "
+                    "to len(dominance_coeff_breaks) + 1"
+                )
+            lb = -1 * np.inf
+            for b in self.dominance_coeff_breaks:
+                if not isinstance(b, (float, int)):
+                    raise ValueError(
+                        "dominance_coeff_breaks must be a list of numbers."
+                    )
+                if not np.isfinite(b):
+                    raise ValueError(f"Invalid dominance coefficient break {b}.")
+                if b < lb:
+                    raise ValueError("dominance_coeff_breaks must be nondecreasing.")
+                lb = b
 
         if not isinstance(self.distribution_type, str):
             raise ValueError("distribution_type must be str.")
@@ -69,9 +147,6 @@ def __attrs_post_init__(self):
         if not isinstance(self.convert_to_substitution, bool):
             raise ValueError("convert_to_substitution must be bool.")
 
-        if not np.isfinite(self.dominance_coeff):
-            raise ValueError(f"Invalid dominance coefficient {self.dominance_coeff}.")
-
         # To add a new distribution type: validate the
         # distribution_args here, and add unit tests.
         if self.distribution_type == "f":
@@ -143,6 +218,19 @@ def __attrs_post_init__(self):
                 f"return {sign}rlnorm(1, {logmean} + log(Q), {logsd});"
             ]
             self.distribution_type = "s"
+        elif self.distribution_type == "u":
+            # Uniform
+            if (
+                len(self.distribution_args) != 2
+                or self.distribution_args[0] > self.distribution_args[1]
+            ):
+                raise ValueError(
+                    "Uniformly-distributed sel. coefs. (distribution_type='u') "
+                    "use a (min, max) parameterisation, with min <= max."
+                )
+            umin, umax = self.distribution_args
+            self.distribution_args = [f"return runif(1, Q * {umin}, Q * {umax});"]
+            self.distribution_type = "s"
         else:
             raise ValueError(
                 f"{self.distribution_type} is not a supported distribution type."