diff --git a/maintenance/ensembl.py b/maintenance/ensembl.py index 9ea3b1e76..4ff4ec417 100644 --- a/maintenance/ensembl.py +++ b/maintenance/ensembl.py @@ -9,6 +9,9 @@ import urllib.request +logger = logging.getLogger("ensembl") + + class EnsemblRestClient: """ A client for the Ensembl REST API. Based on the example code @@ -46,7 +49,7 @@ def _sleep_if_needed(self): if self.num_requests >= self.max_requests_per_second: delta = time.time() - self.last_request_time if delta < 1: - logging.debug("Rate limiting REST API") + logger.info("Rate limiting REST API") time.sleep(1 - delta) self.num_requests = 0 else: @@ -61,9 +64,10 @@ def get(self, endpoint, headers=None, params=None): """ self._sleep_if_needed() request = self._make_request(endpoint, headers, params) - logging.debug("making request to %s", request.full_url) + logger.info("making request to %s", request.full_url) response = urllib.request.urlopen(request) content = response.read() + logger.debug("Response: %s", content) data = json.loads(content) return data @@ -78,6 +82,13 @@ def get_release(self): assert len(releases) == 1 return releases[0] + def get_species_data(self, ensembl_id): + """ + Returns species information for the specified ensembl_id. + """ + output = self.get(endpoint=f"/info/genomes/{ensembl_id}") + return output + def get_genome_data(self, ensembl_id): """ Returns the genome data for the specified Ensembl species diff --git a/maintenance/main.py b/maintenance/main.py index 92e13e23b..95638f422 100644 --- a/maintenance/main.py +++ b/maintenance/main.py @@ -7,49 +7,82 @@ import shutil import string import pathlib +import logging import click import black +import daiquiri import stdpopsim from . import ensembl +logger = logging.getLogger("maint") + species_template = string.Template( """ import stdpopsim from . import genome_data -_chromosomes = [] -for name, data in genome_data.data["chromosomes"].items(): - _chromosomes.append( - stdpopsim.Chromosome( - id=name, - length=data["length"], - synonyms=data["synonyms"], - mutation_rate=0, # FILL ME IN - recombination_rate=0, # FILL ME IN - ) - ) - -_genome = stdpopsim.Genome( - chromosomes=_chromosomes, - mutation_rate_citations=[], # ADD CITATIONS - recombination_rate_citations=[], # ADD CITATIONS - assembly_name=genome_data.data["assembly_name"], - assembly_accession=genome_data.data["assembly_accession"], - assembly_citations=[], +# [The following are notes for implementers and should be deleted +# once the recombination rates have been inserted] +# This is the per-chromosome recombination rate, typically the mean +# rate along the chromosome. +# Values in this dictionary are set to -1 by default, so you have +# to update each one. These should be derived from the most reliable +# data and how they were obtained should be documented here. +# The appropriate citation must be added to the list of +# recombination_rate_citations in the Genome object. + +_recombination_rate = $chromosome_rate_dict + +# [The following are notes for implementers and should be deleted +# once the mutation rates have been inserted] +# This is the per-chromosome mutation rate, typically the mean +# rate along the chromosome. If per chromosome rates are not available, +# the same value should be used for each chromosome. In this case, +# please use a variable to store this value, rather than repeating +# the same numerical constant, e.g. +# _mutation_rate = { +# 1: _overall_rate, +# 2: _overall_rate, +# ... +# Values in this dictionary are set to -1 by default, so you have +# to update each one. These should be derived from the most reliable +# data and how they were obtained should be documented here. +# The appropriate citation must be added to the list of +# mutation_rate_citations in the Genome object. + +_mutation_rate = $chromosome_rate_dict + +_genome = stdpopsim.Genome.from_data( + genome_data.data, + recombination_rate=_recombination_rate, + mutation_rate=_mutation_rate ) +# [Implementers: you must add citations for the values that are +# provided. Do this like: +# _genome.recombination_rate_citations.append( +# stdpopsim.Citation(author=x, date=y, doi=z)) +# _genome.mutation_rate_citations.append( +# stdpopsim.Citation(author=x, date=y, doi=z)) + _species = stdpopsim.Species( id="$sps_id", - name="FIXME", - common_name="FIXME", + ensembl_id="$ensembl_id", + name="$scientific_name", + common_name="$common_name", genome=_genome, - generation_time=0, # FIXME - generation_time_citations=[], - population_size=0, # FIXME + # [Implementers: you must provide an estimate of the generation_time. + # Please also add a citation for this.] + generation_time=0, + # [Implementers: you must provide an estimate of the population size. + # TODO: give a definition of what this should be. + # Please also add a citation for this.] + population_size=0, population_size_citations=[], + generation_time_citations=[], ) stdpopsim.register_species(_species) @@ -58,20 +91,54 @@ species_test_template = string.Template( """ +import pytest + import stdpopsim from tests import test_species -class TestSpecies(test_species.SpeciesTestBase): +class TestSpeciesData(test_species.SpeciesTestBase): species = stdpopsim.get_species("$sps_id") - # TODO specific tests for species data. + def test_ensembl_id(self): + assert self.species.ensembl_id == "$ensembl_id" -class TestGenome(test_species.GenomeTestBase): + def test_name(self): + assert self.species.name == "$scientific_name" + + def test_common_name(self): + assert self.species.common_name == "$common_name" + + # QC Tests. These tests are performed by another contributor + # independently referring to the citations provided in the + # species definition, filling in the appropriate values + # and deleting the pytest "skip" annotations. + @pytest.mark.skip("Population size QC not done yet") + def test_qc_population_size(self): + assert self.species.population_size == -1 + + @pytest.mark.skip("Generation time QC not done yet") + def test_qc_generation_time(self): + assert self.species.generation_time == -1 + +class TestGenomeData(test_species.GenomeTestBase): genome = stdpopsim.get_species("$sps_id").genome + @pytest.mark.skip("Recombination rate QC not done yet") + @pytest.mark.parametrize( + ["name", "rate"], + $chromosome_rate_dict.items()) + def test_recombination_rate(self, name, rate): + assert pytest.approx(rate, self.genome.get_chromosome(name).recombination_rate) + + @pytest.mark.skip("Mutation rate QC not done yet") + @pytest.mark.parametrize( + ["name", "rate"], + $chromosome_rate_dict.items()) + def test_mutation_rate(self, name, rate): + assert pytest.approx(rate, self.genome.get_chromosome(name).mutation_rate) """ ) @@ -92,7 +159,7 @@ def catalog_path(sps_id): return pathlib.Path(f"stdpopsim/catalog/{sps_id}") -def write_catalog_stub(path, sps_id, ensembl_id): +def write_catalog_stub(*, path, sps_id, ensembl_id, species_data, genome_data): """ Writes stub files to the catalog for a new species. """ @@ -102,15 +169,32 @@ def write_catalog_stub(path, sps_id, ensembl_id): print('"""', file=f) print("from . import species # noqa: F401", file=f) - species_code = species_template.substitute(sps_id=sps_id) + scientific_name = species_data["scientific_name"] + common_name = species_data["display_name"] + logger.info(f"{sps_id}: name={scientific_name}, common_name={common_name}") + chr_names = genome_data["chromosomes"].keys() + chromosome_rate_template = {name: -1 for name in chr_names} + species_code = species_template.substitute( + ensembl_id=ensembl_id, + sps_id=sps_id, + scientific_name=scientific_name, + common_name=common_name, + chromosome_rate_dict=chromosome_rate_template, + ) path = path / "species.py" - click.echo(f"Writing species definition stub to {path}") + logger.info(f"Writing species definition stub to {path}") with open(path, "w") as f: f.write(black_format(species_code)) - test_code = species_test_template.substitute(sps_id=sps_id) + test_code = species_test_template.substitute( + ensembl_id=ensembl_id, + sps_id=sps_id, + scientific_name=scientific_name, + common_name=common_name, + chromosome_rate_dict=chromosome_rate_template, + ) test_path = pathlib.Path("tests") / f"test_{sps_id}.py" - click.echo(f"Writing species test stub to {test_path}") + logger.info(f"Writing species test stub to {test_path}") with open(test_path, "w") as f: f.write(black_format(test_code)) @@ -132,13 +216,20 @@ def write(self, path): def add_species(self, ensembl_id, force=False): sps_id = ensembl_stdpopsim_id(ensembl_id) - click.echo(f"Adding new species {sps_id} for Ensembl ID {ensembl_id}") + logger.info(f"Adding new species {sps_id} for Ensembl ID {ensembl_id}") root = catalog_path(sps_id) if force: shutil.rmtree(root, ignore_errors=True) root.mkdir() - self.write_genome_data(ensembl_id) - write_catalog_stub(root, sps_id, ensembl_id) + genome_data = self.write_genome_data(ensembl_id) + species_data = self.ensembl_client.get_species_data(ensembl_id) + write_catalog_stub( + path=root, + sps_id=sps_id, + ensembl_id=ensembl_id, + species_data=species_data, + genome_data=genome_data, + ) def write_genome_data(self, ensembl_id): sps_id = ensembl_stdpopsim_id(ensembl_id) @@ -147,7 +238,7 @@ def write_genome_data(self, ensembl_id): raise ValueError( f"Directory {id} corresponding to {ensembl_id} does" + "not exist" ) - click.echo(f"Writing genome data for {sps_id} {ensembl_id}") + logger.info(f"Writing genome data for {sps_id} {ensembl_id}") path = path / "genome_data.py" data = self.ensembl_client.get_genome_data(ensembl_id) code = f"data = {data}" @@ -155,10 +246,11 @@ def write_genome_data(self, ensembl_id): # Format the code with Black so we don't get noisy diffs with self.write(path) as f: f.write(black_format(code)) + return data def write_ensembl_release(self): release = self.ensembl_client.get_release() - click.echo(f"Using Ensembl release {release}") + logger.info(f"Using Ensembl release {release}") path = pathlib.Path("stdpopsim/catalog/ensembl_info.py") code = f"release = {release}" with self.write(path) as f: @@ -171,8 +263,16 @@ def write_ensembl_release(self): @click.group() -def cli(): - pass +@click.option("--debug", is_flag=True) +def cli(debug): + log_level = logging.INFO + if debug: + log_level = logging.DEBUG + daiquiri.setup(level=log_level) + + # Black's logging is very noisy + black_logger = logging.getLogger("blib2to3") + black_logger.setLevel(logging.CRITICAL) @cli.command() @@ -215,7 +315,7 @@ def add_species(ensembl_id, force): Add a new species to the catalog using its ensembl ID. """ writer = DataWriter() - writer.add_species(ensembl_id, force=force) + writer.add_species(ensembl_id.lower(), force=force) writer.write_ensembl_release() diff --git a/stdpopsim/catalog/AraTha/species.py b/stdpopsim/catalog/AraTha/species.py index 561ac28dc..9f1b98d8c 100644 --- a/stdpopsim/catalog/AraTha/species.py +++ b/stdpopsim/catalog/AraTha/species.py @@ -31,7 +31,7 @@ mutation_rate_citations=[ stdpopsim.Citation( author="Ossowski et al.", - year="2010", + year=2010, doi="https://doi.org/10.1126/science.1180677", reasons={stdpopsim.CiteReason.MUT_RATE}, ) @@ -39,7 +39,7 @@ recombination_rate_citations=[ stdpopsim.Citation( author="Huber et al.", - year="2014", + year=2014, doi="https://doi.org/10.1093/molbev/msu247", reasons={stdpopsim.CiteReason.REC_RATE}, ) @@ -47,7 +47,7 @@ assembly_citations=[ stdpopsim.Citation( doi="https://doi.org/10.1093/nar/gkm965", - year="2007", + year=2007, author="Swarbreck et al.", reasons={stdpopsim.CiteReason.ASSEMBLY}, ) @@ -56,6 +56,7 @@ _species = stdpopsim.Species( id="AraTha", + ensembl_id="arabidopsis_thaliana", name="Arabidopsis thaliana", common_name="A. thaliana", genome=_genome, @@ -63,7 +64,7 @@ generation_time_citations=[ stdpopsim.Citation( doi="https://doi.org/10.1890/0012-9658(2002)083[1006:GTINSO]2.0.CO;2", - year="2002", + year=2002, author="Donohue", reasons={stdpopsim.CiteReason.GEN_TIME}, ) @@ -72,7 +73,7 @@ population_size_citations=[ stdpopsim.Citation( doi="https://doi.org/10.1016/j.cell.2016.05.063", - year="2016", + year=2016, author="1001GenomesConsortium", reasons={stdpopsim.CiteReason.POP_SIZE}, ) diff --git a/stdpopsim/catalog/BosTau/species.py b/stdpopsim/catalog/BosTau/species.py index 609de15b5..8f342885b 100644 --- a/stdpopsim/catalog/BosTau/species.py +++ b/stdpopsim/catalog/BosTau/species.py @@ -75,6 +75,7 @@ _species = stdpopsim.Species( id="BosTau", + ensembl_id="bos_taurus", name="Bos Taurus", common_name="Cattle", genome=_genome, diff --git a/stdpopsim/catalog/CanFam/species.py b/stdpopsim/catalog/CanFam/species.py index ddea23514..4128b7400 100644 --- a/stdpopsim/catalog/CanFam/species.py +++ b/stdpopsim/catalog/CanFam/species.py @@ -105,6 +105,7 @@ _species = stdpopsim.Species( id="CanFam", + ensembl_id="canis_familiaris", name="Canis familiaris", common_name="Dog", genome=_genome, diff --git a/stdpopsim/catalog/DroMel/species.py b/stdpopsim/catalog/DroMel/species.py index ecc501397..8aa818e79 100644 --- a/stdpopsim/catalog/DroMel/species.py +++ b/stdpopsim/catalog/DroMel/species.py @@ -3,11 +3,6 @@ import stdpopsim from . import genome_data -########################################################### -# -# Genome definition -# -########################################################### _LiAndStephan = stdpopsim.Citation( author="Li et al.", year=2006, doi="https://doi.org/10.1371/journal.pgen.0020166" @@ -21,7 +16,7 @@ _DosSantosEtAl = stdpopsim.Citation( doi="https://doi.org/10.1093/nar/gku1099", - year="2015", + year=2015, author="dos Santos et al.", reasons={stdpopsim.CiteReason.ASSEMBLY}, ) @@ -56,6 +51,7 @@ _species = stdpopsim.Species( id="DroMel", + ensembl_id="drosophila_melanogaster", name="Drosophila melanogaster", common_name="D. melanogaster", genome=_genome, diff --git a/stdpopsim/catalog/EscCol/species.py b/stdpopsim/catalog/EscCol/species.py index 91e14108d..261a19d53 100644 --- a/stdpopsim/catalog/EscCol/species.py +++ b/stdpopsim/catalog/EscCol/species.py @@ -1,29 +1,25 @@ import stdpopsim from . import genome_data -########################################################### -# -# Genome definition -# -########################################################### - _hartl_et_al = stdpopsim.Citation( author="Hartl, Moriyama, and Sawyer", - year="1994", + year=1994, # doesn't have a doi doi="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1206133/", ) _sezonov_et_al = stdpopsim.Citation( - author="Sezonov et al.", year="2007", doi="https://doi.org/10.1128/JB.01368-07" + author="Sezonov et al.", year=2007, doi="https://doi.org/10.1128/JB.01368-07" ) _wielgoss_et_al = stdpopsim.Citation( - author="Wielgoss et al.", year="2011", doi="https://doi.org/10.1534/g3.111.000406" + author="Wielgoss et al.", year=2011, doi="https://doi.org/10.1534/g3.111.000406" ) _blattner_et_al = stdpopsim.Citation( - author="Blattner et al.", year="1997", doi="10.1126/science.277.5331.1453" + author="Blattner et al.", + year=1997, + doi="https://doi.org/10.1126/science.277.5331.1453", ) _chromosomes = [] diff --git a/stdpopsim/catalog/HomSap/species.py b/stdpopsim/catalog/HomSap/species.py index 23cade97e..f8d4f5007 100644 --- a/stdpopsim/catalog/HomSap/species.py +++ b/stdpopsim/catalog/HomSap/species.py @@ -33,7 +33,7 @@ _genome2001 = stdpopsim.Citation( doi="http://dx.doi.org/10.1038/35057062", - year="2001", + year=2001, author="International Human Genome Sequencing Consortium", reasons={stdpopsim.CiteReason.ASSEMBLY}, ) @@ -46,21 +46,21 @@ _takahata1993 = stdpopsim.Citation( doi="https://doi.org/10.1093/oxfordjournals.molbev.a039995", - year="1993", + year=1993, author="Takahata", reasons={stdpopsim.CiteReason.POP_SIZE}, ) _tian2019 = stdpopsim.Citation( doi="https://doi.org/10.1016/j.ajhg.2019.09.012", - year="2019", + year=2019, author="Tian, Browning, and Browning", reasons={stdpopsim.CiteReason.MUT_RATE}, ) _tremblay2000 = stdpopsim.Citation( doi="https://doi.org/10.1086/302770", - year="2000", + year=2000, author="Tremblay and Vézina", reasons={stdpopsim.CiteReason.GEN_TIME}, ) @@ -88,6 +88,7 @@ _species = stdpopsim.Species( id="HomSap", + ensembl_id="homo_sapiens", name="Homo sapiens", common_name="Human", genome=_genome, diff --git a/stdpopsim/catalog/PonAbe/species.py b/stdpopsim/catalog/PonAbe/species.py index 3c80bc528..b754fc460 100644 --- a/stdpopsim/catalog/PonAbe/species.py +++ b/stdpopsim/catalog/PonAbe/species.py @@ -64,6 +64,7 @@ _species = stdpopsim.Species( id="PonAbe", + ensembl_id="pongo_abelii", name="Pongo abelii", common_name="Sumatran orangutan", genome=_genome, diff --git a/stdpopsim/citations.py b/stdpopsim/citations.py index 244a44f86..28da77810 100644 --- a/stdpopsim/citations.py +++ b/stdpopsim/citations.py @@ -9,6 +9,9 @@ import attr +# TODO this should be an enum.Enum + + class CiteReason: ENGINE = "simulation engine" DEM_MODEL = "demographic model" @@ -75,6 +78,23 @@ def merge(citations): cset[citation.doi] = citation return list(cset.values()) + def assert_valid(self): + """ + Checks that this citation is valid by checking the types and values + of the instance variables. + """ + assert isinstance(self.author, str) + assert len(self.author) > 0 + assert isinstance(self.doi, str) + assert len(self.doi) > 0 + parsed = urllib.parse.urlparse(self.doi) + assert parsed.scheme.startswith("http") + assert isinstance(self.year, int) + assert self.year > 0 + for reason in self.reasons: + assert isinstance(reason, str) + # TODO check that it's in the set of accepted reasons. + def fetch_bibtex(self): """Retrieve the bibtex of a citation from Crossref.""" req = urllib.request.Request(self.doi) diff --git a/stdpopsim/genomes.py b/stdpopsim/genomes.py index 15186a275..14c1529fd 100644 --- a/stdpopsim/genomes.py +++ b/stdpopsim/genomes.py @@ -25,13 +25,45 @@ class Genome: """ chromosomes = attr.ib(factory=list) - mutation_rate_citations = attr.ib(factory=list, kw_only=True) - recombination_rate_citations = attr.ib(factory=list, kw_only=True) - assembly_citations = attr.ib(factory=list, kw_only=True) assembly_name = attr.ib(default=None, kw_only=True) assembly_accession = attr.ib(default=None, kw_only=True) length = attr.ib(default=0, init=False) + # TODO these should all be combined into a single "citations" attr, + # since we already have a "reason" attribute in Citation. + mutation_rate_citations = attr.ib(factory=list, kw_only=True) + recombination_rate_citations = attr.ib(factory=list, kw_only=True) + assembly_citations = attr.ib(factory=list, kw_only=True) + + @staticmethod + def from_data(genome_data, *, recombination_rate, mutation_rate): + """ + Construct a Genome object from the specified dictionary of + genome information from Ensembl, recombination_rate and + mutation_rate dictionaries. + + This method is for internal use only. + """ + chr_names = set(genome_data["chromosomes"].keys()) + assert set(recombination_rate.keys()) == chr_names + assert set(mutation_rate.keys()) == chr_names + chromosomes = [] + for name, data in genome_data["chromosomes"].items(): + chromosomes.append( + Chromosome( + id=name, + length=data["length"], + synonyms=data["synonyms"], + mutation_rate=mutation_rate[name], + recombination_rate=recombination_rate[name], + ) + ) + return Genome( + chromosomes=chromosomes, + assembly_name=genome_data["assembly_name"], + assembly_accession=genome_data["assembly_accession"], + ) + def __attrs_post_init__(self): for chromosome in self.chromosomes: self.length += chromosome.length diff --git a/stdpopsim/species.py b/stdpopsim/species.py index ea0516323..745045713 100644 --- a/stdpopsim/species.py +++ b/stdpopsim/species.py @@ -121,11 +121,8 @@ class Species: :ivar demographic_models: This list of :class:`DemographicModel` instances in the catalog for this species. :vartype demographic_models: list - :ivar ensembl_id: The ensembl id for the species' genome assembly, - which will be used by maintenance scripts to query ensembl's database. - This parameter will be automatically populated from the species name, - and should not be set directly unless a non-default assembly is used - for the species definition (e.g. see E. coli). + :ivar ensembl_id: The ensembl id for the species which is used by + maintenance scripts to query ensembl's database. :vartype ensembl_id: str """ @@ -133,26 +130,22 @@ class Species: name = attr.ib(type=str, kw_only=True) common_name = attr.ib(type=str, kw_only=True) genome = attr.ib(type=int, kw_only=True) - generation_time = attr.ib(default=1, kw_only=True) - generation_time_citations = attr.ib(factory=list, kw_only=True) - population_size = attr.ib(default=1, kw_only=True) - population_size_citations = attr.ib(factory=list, kw_only=True) + generation_time = attr.ib(default=0, kw_only=True) + population_size = attr.ib(default=0, kw_only=True) demographic_models = attr.ib(factory=list, kw_only=True) ensembl_id = attr.ib(type=str, kw_only=True) + + # TODO these should be combined into a single "citations" list. + # We have a citation.reasons argument which should be sufficient. + generation_time_citations = attr.ib(factory=list, kw_only=True) + population_size_citations = attr.ib(factory=list, kw_only=True) + # A list of genetic maps. This is undocumented as the parameter is not # intended to be used when the Species is initialsed. # Use add_genetic_map() instead. genetic_maps = attr.ib(factory=list, kw_only=True) annotations = attr.ib(factory=list, kw_only=True) - @ensembl_id.default - def _default_ensembl_id(self): - """ - Returns the ID of this species for the Ensembl REST API. - This is the species name, underscore delimited and in lowercase. - """ - return self.name.lower().replace(" ", "_") - def get_contig( self, chromosome=None, diff --git a/tests/test_HomSap.py b/tests/test_HomSap.py index 2e769bf8b..2a19ec8a6 100644 --- a/tests/test_HomSap.py +++ b/tests/test_HomSap.py @@ -25,6 +25,9 @@ def test_basic_attributes(self): @pytest.mark.parametrize("chr_id", [chrom.id for chrom in genome.chromosomes]) def test_recombination_rates(self, chr_id): + # We should recast this test and just hard code in the values. + # Tests should be *obvious* not clever. + # recompute recombination rates from HapMapII_GRCh37 map then # compare the results to the current recombination rates for each chromosome genetic_map = "HapMapII_GRCh37" diff --git a/tests/test_annotations.py b/tests/test_annotations.py index c7ef20b6d..2d35cffa2 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -64,6 +64,7 @@ def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species( id="TesSpe", + ensembl_id="test_species", name="Test species", common_name="Testy McTestface", genome=genome, diff --git a/tests/test_genetic_maps.py b/tests/test_genetic_maps.py index c17b6b5ec..87e5cb808 100644 --- a/tests/test_genetic_maps.py +++ b/tests/test_genetic_maps.py @@ -63,6 +63,7 @@ def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species( id="TesSpe", + ensembl_id="test_species", name="Test species", common_name="Testy McTestface", genome=genome, diff --git a/tests/test_species.py b/tests/test_species.py index 7b3e246aa..76fee5f16 100644 --- a/tests/test_species.py +++ b/tests/test_species.py @@ -104,14 +104,38 @@ def test_id(self): assert isinstance(self.species.id, str) assert utils.is_valid_species_id(self.species.id) - def test_name(self): + def test_name_basics(self): assert isinstance(self.species.name, str) assert utils.is_valid_species_name(self.species.name) - def test_common_name(self): + def test_common_name_basics(self): assert isinstance(self.species.common_name, str) assert utils.is_valid_species_common_name(self.species.name) + def test_genome_type(self): + assert isinstance(self.species.genome, stdpopsim.Genome) + + def test_demographic_model_types(self): + assert isinstance(self.species.demographic_models, list) + for model in self.species.demographic_models: + assert isinstance(model, stdpopsim.DemographicModel) + + def test_citation_properties(self): + all_citations = ( + self.species.generation_time_citations + + self.species.population_size_citations + ) + for citation in all_citations: + # Test some basic stuff about the citations. + assert isinstance(citation, stdpopsim.Citation) + citation.assert_valid() + + def test_generation_time_defined(self): + assert self.species.generation_time > 0 + + def test_population_size_defined(self): + assert self.species.population_size > 0 + class GenomeTestBase: """ @@ -153,6 +177,30 @@ def test_mean_mutation_rate(self): assert mean_genome_mr >= lowest_mr assert highest_mr >= mean_genome_mr + def test_chromosomes(self): + assert len(self.genome.chromosomes) > 0 + for chrom in self.genome.chromosomes: + assert isinstance(chrom, stdpopsim.Chromosome) + + def test_mutation_rates_set(self): + for chrom in self.genome.chromosomes: + assert chrom.mutation_rate >= 0 + + def test_recombination_rates_set(self): + for chrom in self.genome.chromosomes: + assert chrom.recombination_rate >= 0 + + def test_citation_properties(self): + all_citations = ( + self.genome.mutation_rate_citations + + self.genome.recombination_rate_citations + + self.genome.assembly_citations + ) + for citation in all_citations: + # Test some basic stuff about the citations. + assert isinstance(citation, stdpopsim.Citation) + citation.assert_valid() + class TestAllGenomes(unittest.TestCase): """