From 53d3af64d9e54c0ee0d6fb804e67aee80a31c698 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Fri, 12 Apr 2024 18:19:22 -0400 Subject: [PATCH] index draft (#482) --- .../concepts/location/SequenceLocation.rst | 0 .../molecular_variation/Adjacency.rst | 0 .../concepts/molecular_variation/Allele.rst | 0 .../molecular_variation/CisPhasedBlock.rst | 0 .../DerivativeSequence.rst | 0 .../molecular_variation/SequenceTerminus.rst | 0 .../concepts/molecular_variation/index.rst | 0 docs/source/concepts/other/Range.rst | 0 docs/source/concepts/other/Residue.rst | 0 docs/source/concepts/other/SequenceString.rst | 0 .../sequence_expression/LengthExpression.rst | 0 .../LiteralSequenceExpression | 0 .../ReferenceLengthExpression.rst | 0 .../concepts/sequence_expression/index.rst | 0 .../systemic_variation/CopyChange.rst | 0 .../concepts/systemic_variation/CopyCount.rst | 0 .../concepts/systemic_variation/index.rst | 0 .../computed_identifiers.rst | 0 .../{impl-guide => conventions}/example.rst | 0 .../{impl-guide => conventions}/index.rst | 0 .../normalization.rst | 0 .../required_data.rst | 0 docs/source/datatypes_classes/base_types.rst | 90 ---- docs/source/datatypes_classes/classes.rst | 437 ------------------ .../datatypes_classes/general_purpose.rst | 39 -- docs/source/datatypes_classes/index.rst | 31 -- docs/source/datatypes_classes/primitives.rst | 64 --- .../datatypes_classes/special_purpose.rst | 87 ---- docs/source/index.rst | 8 +- docs/source/profiles/index.rst | 1 + docs/source/quickstart.rst | 0 31 files changed, 5 insertions(+), 752 deletions(-) create mode 100644 docs/source/concepts/location/SequenceLocation.rst create mode 100644 docs/source/concepts/molecular_variation/Adjacency.rst create mode 100644 docs/source/concepts/molecular_variation/Allele.rst create mode 100644 docs/source/concepts/molecular_variation/CisPhasedBlock.rst create mode 100644 docs/source/concepts/molecular_variation/DerivativeSequence.rst create mode 100644 docs/source/concepts/molecular_variation/SequenceTerminus.rst create mode 100644 docs/source/concepts/molecular_variation/index.rst create mode 100644 docs/source/concepts/other/Range.rst create mode 100644 docs/source/concepts/other/Residue.rst create mode 100644 docs/source/concepts/other/SequenceString.rst create mode 100644 docs/source/concepts/sequence_expression/LengthExpression.rst create mode 100644 docs/source/concepts/sequence_expression/LiteralSequenceExpression create mode 100644 docs/source/concepts/sequence_expression/ReferenceLengthExpression.rst create mode 100644 docs/source/concepts/sequence_expression/index.rst create mode 100644 docs/source/concepts/systemic_variation/CopyChange.rst create mode 100644 docs/source/concepts/systemic_variation/CopyCount.rst create mode 100644 docs/source/concepts/systemic_variation/index.rst rename docs/source/{impl-guide => conventions}/computed_identifiers.rst (100%) rename docs/source/{impl-guide => conventions}/example.rst (100%) rename docs/source/{impl-guide => conventions}/index.rst (100%) rename docs/source/{impl-guide => conventions}/normalization.rst (100%) rename docs/source/{impl-guide => conventions}/required_data.rst (100%) delete mode 100644 docs/source/datatypes_classes/base_types.rst delete mode 100644 docs/source/datatypes_classes/classes.rst delete mode 100644 docs/source/datatypes_classes/general_purpose.rst delete mode 100644 docs/source/datatypes_classes/index.rst delete mode 100644 docs/source/datatypes_classes/primitives.rst delete mode 100644 docs/source/datatypes_classes/special_purpose.rst create mode 100644 docs/source/profiles/index.rst create mode 100644 docs/source/quickstart.rst diff --git a/docs/source/concepts/location/SequenceLocation.rst b/docs/source/concepts/location/SequenceLocation.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/molecular_variation/Adjacency.rst b/docs/source/concepts/molecular_variation/Adjacency.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/molecular_variation/Allele.rst b/docs/source/concepts/molecular_variation/Allele.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/molecular_variation/CisPhasedBlock.rst b/docs/source/concepts/molecular_variation/CisPhasedBlock.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/molecular_variation/DerivativeSequence.rst b/docs/source/concepts/molecular_variation/DerivativeSequence.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/molecular_variation/SequenceTerminus.rst b/docs/source/concepts/molecular_variation/SequenceTerminus.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/molecular_variation/index.rst b/docs/source/concepts/molecular_variation/index.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/other/Range.rst b/docs/source/concepts/other/Range.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/other/Residue.rst b/docs/source/concepts/other/Residue.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/other/SequenceString.rst b/docs/source/concepts/other/SequenceString.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/sequence_expression/LengthExpression.rst b/docs/source/concepts/sequence_expression/LengthExpression.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/sequence_expression/LiteralSequenceExpression b/docs/source/concepts/sequence_expression/LiteralSequenceExpression new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/sequence_expression/ReferenceLengthExpression.rst b/docs/source/concepts/sequence_expression/ReferenceLengthExpression.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/sequence_expression/index.rst b/docs/source/concepts/sequence_expression/index.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/systemic_variation/CopyChange.rst b/docs/source/concepts/systemic_variation/CopyChange.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/systemic_variation/CopyCount.rst b/docs/source/concepts/systemic_variation/CopyCount.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/concepts/systemic_variation/index.rst b/docs/source/concepts/systemic_variation/index.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/impl-guide/computed_identifiers.rst b/docs/source/conventions/computed_identifiers.rst similarity index 100% rename from docs/source/impl-guide/computed_identifiers.rst rename to docs/source/conventions/computed_identifiers.rst diff --git a/docs/source/impl-guide/example.rst b/docs/source/conventions/example.rst similarity index 100% rename from docs/source/impl-guide/example.rst rename to docs/source/conventions/example.rst diff --git a/docs/source/impl-guide/index.rst b/docs/source/conventions/index.rst similarity index 100% rename from docs/source/impl-guide/index.rst rename to docs/source/conventions/index.rst diff --git a/docs/source/impl-guide/normalization.rst b/docs/source/conventions/normalization.rst similarity index 100% rename from docs/source/impl-guide/normalization.rst rename to docs/source/conventions/normalization.rst diff --git a/docs/source/impl-guide/required_data.rst b/docs/source/conventions/required_data.rst similarity index 100% rename from docs/source/impl-guide/required_data.rst rename to docs/source/conventions/required_data.rst diff --git a/docs/source/datatypes_classes/base_types.rst b/docs/source/datatypes_classes/base_types.rst deleted file mode 100644 index ad2a4e6c..00000000 --- a/docs/source/datatypes_classes/base_types.rst +++ /dev/null @@ -1,90 +0,0 @@ -VRS Base Types -@@@@@@@@@@@@@@ - -The VRS Base Types are a set of classes that can be used to create -VRS data classes that can be used to represent variation and location -data. It uses core elements of the GKS Common as a foundation. - -.. _Entity: - -.. include:: ../defs/gks.common/Entity.rst - -Entity -###### - -.. _ValueObject: - -ValueObject -########### - -.. include:: ../defs/vrs/ValueObject.rst - -Ga4ghIdentifiableObject -####################### - -.. include:: ../defs/vrs/Ga4ghIdentifiableObject.rst - -.. _Location: - -Location -######## - -As used by biologists, the precision of "location" (or "locus") varies -widely, ranging from precise start and end numerical coordinates -defining a Location, to bounded regions of a sequence, to conceptual -references to named genomic features (e.g., chromosomal bands, genes, -exons) as proxies for the Locations on an implied reference sequence. - -The most common and concrete Location is a :ref:`SequenceLocation`, i.e., -a Location based on a named sequence and an Interval on that sequence. Other -types of Location may be added based on community need. - -.. include:: ../defs/vrs/Location.rst - -**Implementation Guidance** - -* Location refers to a position. Although it MAY imply a sequence, - the two concepts are not interchangeable, especially when the - location is non-specific (e.g., specified one or more ambiguous endpoints). - To represent a sequence derived from a Location, see - :ref:`SequenceExpression`. - - -.. _Variation: - -Variation -######### - -In the genetics community, variation is often used to mean *sequence* -variation, describing the differences observed in DNA or AA bases among -individuals, and typically with respect to a common reference sequence. - -In VRS, the Variation class is the conceptual root of all types of biomolecular -variation, and the *Variation* abstract class is the top-level object in -the :ref:`vr-schema-diagram`. Variation types are broadly categorized as -:ref:`MolecularVariation`, :ref:`SystemicVariation`, or a :ref:`utility -subclass `. Types of variation are widely varied, and -there are several :ref:`planned-variation` currently under consideration -to capture this diversity. - -.. include:: ../defs/vrs/Variation.rst - -.. _MolecularVariation: - -Molecular Variation -$$$$$$$$$$$$$$$$$$$ - -.. include:: ../defs/vrs/MolecularVariation.rst - -.. _SystemicVariation: - -Systemic Variation -$$$$$$$$$$$$$$$$$$ - -.. include:: ../defs/vrs/SystemicVariation.rst - - -CopyNumber -########## - -.. include:: ../defs/vrs/CopyNumber.rst \ No newline at end of file diff --git a/docs/source/datatypes_classes/classes.rst b/docs/source/datatypes_classes/classes.rst deleted file mode 100644 index 027d9f0a..00000000 --- a/docs/source/datatypes_classes/classes.rst +++ /dev/null @@ -1,437 +0,0 @@ -Classes -@@@@@@@ - -.. _SequenceLocation: - -SequenceLocation -$$$$$$$$$$$$$$$$ - -A *Sequence Location* is a specified subsequence of a reference :ref:`Sequence`. -The reference is typically a chromosome, transcript, or protein sequence. - -.. include:: ../defs/vrs/SequenceLocation.rst - -**Implementation Guidance** - -* For a :ref:`Sequence` of length *n*: - * 0 ≤ *interval.start* ≤ *interval.end* ≤ *n* - * inter-residue coordinate 0 refers to the point before the start of the Sequence - * inter-residue coordinate n refers to the point after the end of the Sequence. -* Coordinates MUST refer to a valid Sequence. VRS does not support - referring to intronic positions within a transcript sequence, - extrapolations beyond the ends of sequences, or other implied - sequence. - -.. important:: HGVS permits variants that refer to non-existent - sequence. Examples include coordinates extrapolated - beyond the bounds of a transcript and intronic - sequence. Such variants are not representable using VRS - and MUST be projected to a genomic reference in order - to be represented. - -**Examples** - -.. parsed-literal:: - - { - "interval": { - "end": 44908822, - "start": 44908821, - "type": "SimpleInterval" - }, - "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", - "type": "SequenceLocation" - } - -.. _Allele: - -Allele -$$$$$$ - -.. note:: The terms *allele* and *variant* are often used interchangeably, - although this use may mask subtle distinctions made by some users. - Specifically, while *allele* connotes a specific sequence state, - *variant* connotes a **change** between states. - - This distinction makes it awkward to use *variant* to represent an - unchanged (refrence-agreement) state at a Sequence Location. This was - a primary factor for choosing to use *allele* over *variant* - when designing VRS. Read more about this design decision: Using - :ref:`allele-not-variant`. - -An allele may refer to a number of alternative forms of the same gene or same -genetic locus. In the genetics community, *allele* may also refer to a -specific haplotype. In the context of biological sequences, "allele" refers -to a distinct state of a molecule at a location. - -.. include:: ../defs/vrs/Allele.rst - -**Implementation Guidance** - -* The :ref:`SequenceExpression` and :ref:`Location` - subclasses respectively represent diverse kinds of - sequence changes and mechanisms for describing the locations of - those changes, including varying levels of precision of sequence - location and categories of sequence changes. -* Implementations MUST enforce values interval.end ≤ sequence_length - when the Sequence length is known. -* Alleles are equal only if the component fields are equal: at the - same location and with the same state. -* Alleles MAY have multiple related representations on the same - Sequence type due to normalization differences. -* Implementations SHOULD normalize Alleles using :ref:`fully-justified - normalization ` whenever possible to facilitate - comparisons of variation in regions of representational ambiguity. -* Implementations SHOULD preferentially represent Alleles using - :ref:`LiteralSequenceExpression`, however there are cases where use - of other :ref:`SequenceExpression` classes is most appropriate; see - :ref:`using-sequence-expressions` for guidance. -* When the alternate Sequence is the same length as the interval, the - lengths of the reference Sequence and imputed Sequence are the - same. (Here, imputed sequence means the sequence derived by applying - the Allele to the reference sequence.) When the replacement Sequence - is shorter than the length of the interval, the imputed Sequence is - shorter than the reference Sequence, and conversely for replacements - that are larger than the interval. -* When the state is a :ref:`LiteralSequenceExpression` of ``""`` (the empty - string), the Allele refers to a deletion at this location. -* The Allele entity is based on Sequence and is intended to be used - for intragenic and extragenic variation. Alleles are not explicitly - associated with genes or other features. -* Biologically, referring to Alleles is typically meaningful only in - the context of empirical alternatives. For modelling purposes, - Alleles MAY exist as a result of biological observation or - computational simulation, i.e., virtual Alleles. -* "Single, contiguous" refers the representation of the Allele, not - the biological mechanism by which it was created. For instance, two - non-adjacent single residue Alleles could be represented by a single - contiguous multi-residue Allele. -* When a trait has a known genetic basis, it is typically represented - computationally as an association with an Allele. -* This specification's definition of Allele applies to any - :ref:`Location`, including locations on RNA or protein - :ref:`Sequence`. - -**Examples** - -An Allele correponding to rs7412 C>T on GRCh38: - -.. parsed-literal:: - - { - "location": { - "interval": { - "end": { - "type": "Number", - "value": 44908822 - }, - "start": { - "type": "Number", - "value": 44908821 - }, - "type": "SequenceInterval" - }, - "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", - "type": "SequenceLocation" - }, - "state": { - "sequence": "T", - "type": "SequenceState" - }, - "type": "Allele" - } - - -**Sources** - -* `ISOGG: Allele `__ — An allele is one - of two or more forms of the DNA sequence of a particular gene. -* `SequenceOntology: allele (SO:0001023) - `__ - — An allele is one of a set of coexisting sequence variants of a - gene. -* `SequenceOntology: sequence_alteration (SO:0001059) - `__ - — A sequence_alteration is a sequence_feature whose extent is the - deviation from another sequence. -* `SequenceOntology: sequence_variant (SO:0001060) - `__ - — A sequence_variant is a non exact copy of a sequence_feature or - genome exhibiting one or more sequence_alteration. -* `Wikipedia: Allele `__ — One - of a number of alternative forms of the same gene or same genetic - locus. -* `GenotypeOntology: Allele (GENO:0000512) - `__ - A sequence - feature representing one of a set of coexisting sequences at a - particular genomic locus. An allele can represent a 'reference' or - 'variant' sequence at a locus. - - -.. _Haplotype: - -Haplotype -$$$$$$$$$ - -Haplotypes are a specific combination of Alleles that are *in-cis*: occurring -on the same physical molecule. Haplotypes are commonly described with respect -to locations on a gene, a set of nearby genes, or other physically proximal -genetic markers that tend to be transmitted together. - -.. include:: ../defs/vrs/Haplotype.rst - -**Implementation Guidance** - -* Haplotypes are an assertion of Alleles known to occur "in cis" or - "in phase" with each other. -* All Alleles in a Haplotype MUST be defined on the same reference - sequence or chromosome. -* Alleles within a Haplotype MUST not overlap ("overlap" is defined in - Interval). -* The locations of Alleles within the Haplotype MUST be interpreted - independently. Alleles that create a net insertion or deletion of - sequence MUST NOT change the location of "downstream" Alleles. -* The `members` attribute is required and MUST contain at least two - Alleles. - -**Sources** - -* `ISOGG: Haplotype `__ — A haplotype - is a combination of alleles (DNA sequences) at different places - (loci) on the chromosome that are transmitted together. A haplotype - may be one locus, several loci, or an entire chromosome depending on - the number of recombination events that have occurred between a - given set of loci. -* `SequenceOntology: haplotype (SO:0001024) - `__ - — A haplotype is one of a set of coexisting sequence variants of a - haplotype block. -* `GENO: Haplotype (GENO:0000871) - `__ - - A set of two or more sequence alterations on the same chromosomal - strand that tend to be transmitted together. - -**Examples** - -An APOE ε2 Haplotype with inline Alleles: - -.. parsed-literal:: - - { - "members": [ - { - "location": { - "interval": { - "end": { - "type": "Number", - "value": 44908822 - }, - "start": { - "type": "Number", - "value": 44908821 - }, - "type": "SequenceInterval" - }, - "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", - "type": "SequenceLocation" - }, - "state": { - "sequence": "C", - "type": "LiteralSequenceExpression" - }, - "type": "Allele" - }, - { - "location": { - "interval": { - "end": { - "type": "Number", - "value": 44908684 - }, - "start": { - "type": "Number", - "value": 44908683 - }, - "type": "SequenceInterval" - }, - "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", - "type": "SequenceLocation" - }, - "state": { - "sequence": "C", - "type": "LiteralSequenceExpression" - }, - "type": "Allele" - } - ], - "type": "Haplotype" - } - -The same APOE ε2 Haplotype with referenced Alleles: - -.. parsed-literal:: - - { - "members": [ - "ga4gh:VA.-kUJh47Pu24Y3Wdsk1rXEDKsXWNY-68x", - "ga4gh:VA.Z_rYRxpUvwqCLsCBO3YLl70o2uf9_Op1" - ], - "type": "Haplotype" - } - -The GA4GH computed identifier for these Haplotypes is -``ga4gh:VH.i8owCOBHIlRCPtcw_WzRFNTunwJRy99-``, regardless of whether the -Variation objects are inlined or referenced, and regardless of -order. See :ref:`computed-identifiers` for more information. - - -.. _CopyNumber: -.. _CopyNumberCount: - -CopyNumberCount -$$$$$$$$$$$$$$$ - -*Copy Number Count* captures the integral copies of a molecule within a -genome. Copy Number Count has conflated meanings in the -genomics community, and can mean either (or both) the notion of copy -number *in a genome* or copy number *on a molecule*. VRS separates -the concerns of these two types of statements; this concept is a type -of :ref:`SystemicVariation` and so describes the number of copies in a -genome. The related :ref:`MolecularVariation` concept can be expressed -as an :ref:`Allele` with a :ref:`RepeatedSequenceExpression`. - -.. include:: ../defs/vrs/CopyNumberCount.rst - -**Examples** - -Two, three, or four total copies of BRCA1: - -.. parsed-literal:: - - { - "copies": { - "comparator": ">=", - "type": "IndefiniteRange", - "value": 3 - }, - "subject": { - "gene_id": "ncbigene:348", - "type": "Gene" - }, - "type": "CopyNumberCount" - } - -.. _CopyNumberChange: - -CopyNumberChange -$$$$$$$$$$$$$$$$ - -*Copy Number Change* captures a categorization of copies -of a molecule within a system, relative to a baseline. These types -of Variation are common outputs from CNV callers, particularly in the -somatic domain where integral :ref:`CopyNumberCount` are difficult to -estimate and less useful in practice than relative statements. Somatic CNV -callers typically express changes as relative statements, and many HGVS -expressions submitted to express copy number variation are interpreted to be -relative copy changes. - -.. include:: ../defs/vrs/CopyNumberChange.rst - -**Examples** - -Low-level copy gain of BRCA1: - -.. parsed-literal:: - - { - "copy_change": "efo:0030071", # low-level gain - "subject": { - "gene_id": "ncbigene:348", # BRCA1 gene - "type": "Gene" - }, - "type": "CopyNumberChange" - } - -.. _genotype: - -Genotype -$$$$$$$$ - -A *genotype* is a representation of the variants present at a given genomic locus, and may be referred -to either by individual nucleotide representations (e.g. GT representation in VCF files) or symbolically -(e.g. A/B/O blood type reporting). To support these use cases, VRS genotypes enable representation of -genotypes using either :ref:`Allele` objects (as commonly done in VCF records) or larger :ref:`Haplotype` -objects (which would otherwise be represented using symbolic shorthand). - -.. include:: ../defs/vrs/Genotype.rst - -**Implementation guidance** - -* Haplotypes or Alleles in :ref:`GenotypeMember` objects MAY occur at different locations or on - different reference sequences. For example, an individual may have haplotypes on two - population-specific references. - -**Notes** - -* The term "genotype" has two, related definitions in common use. The - narrower definition is a set of alleles observed at a single - location and often with a ploidy of two, such as a pair of single residue - variants on an autosome. The broader, generalized definition is a - set of alleles at multiple locations and/or with ploidy other than - two. VRS Genotype entity is based on this broader definition. -* The term "diplotype" is often used to refer to two in-trans haplotypes at a locus. - VRS Genotype entity subsumes the conventional definition of diplotype, though - it describes no explicit in-trans phase relationship. Therefore, - VRS does not include an explicit entity for diplotypes. See :ref:`this note - ` for a discussion. -* VRS makes no assumptions about ploidy of an organism or individual nor any - polysomy affecting a locus. The `genotype.count` attribute explicitly captures the total - count of molecules associated with a genomic locus represented by the Genotype. -* In diploid organisms, there are typically two instances of each autosomal chromosome, - and therefore two instances of sequence at a particular locus. Thus, Genotypes will - often list two GenotypeMembers each based on a distinct Haplotype or Allele. In the case - of haploid chromosomes or haploinsufficiency, the Genotype consists of a single GenotypeMember. -* A specific (heterozygous) diplotype SHOULD be represented as a Genotype of two GenotypeMember - instances each containing a constituent :ref:`Haplotype`. A homozygous diplotype SHOULD be - represented as a Genotype of one constituent GenotypeMember (with `GenotypeMember.count=2`). -* A consequence of the computational definition is that in-cis Haplotypes at overlapping or - adjacent intervals MUST be merged into a single Haplotype for the same Genotype. -* A `GenotypeMember.variation` value MUST be unique among Genotype Members within a Genotype. - When more than one Genotype Member would have the same `variation` value (e.g. in the case - of a homozygous variant), this would be represented as a Genotype Value with a corresponding - `count` (i.e. for a diploid homozygous variant, `GenotypeMember.count = 2`). -* The rationale for permitting Genotypes with Haplotypes defined on different reference - sequences is to enable the accurate representation of segments of DNA with the most - appropriate population-specific reference sequence. -* Deletion of sequence at locus would be represented by the presence of Alleles of deleted - sequence, not absence of Alleles; therefore Genotypes MAY NOT have count < 1. - -**Sources** - -SO: `Genotype (SO:0001027) -`__ -— A genotype is a variant genome, complete or incomplete. - -.. _genotypes-represent-haplotypes-with-arbitrary-ploidy: - -.. note:: - VRS defines Genotypes using a list of GenotypeMembers defined by - Haplotypes or Alleles. In essence, Haplotypes and Genotypes represent - two distinct dimensions of containment: Haplotypes represent the "in - phase" relationship of Alleles while Genotypes represents sets of - Haplotypes of arbitrary ploidy. - - There are two important consequences of these definitions: There is no - single-location Genotype. Users of SNP data will be familiar with - representations like rs7412 C/C, which indicates the diploid state at - a position. In VRS, this is merely a special case of a - Genotype with one GenotypeMember, defined by a single Allele with - two copies. VRS does not define a diplotype class. A diplotype - is a special case of a VRS Genotype with count = 2. In practice, software - data types that assume a ploidy of 2 make it very difficult to represent haploid - states, copy number loss, and copy number gain, all of which occur - when representing human data. In addition, inferred ploidy = 2 makes - software incompatible with organisms with other ploidy. VRS - requires explicit definition of the count of molecules associated with - a genomic locus using the `count` attribute, though this count may be inexact - (e.g. a :ref:`DefiniteRange` or :ref:`IndefiniteRange`). \ No newline at end of file diff --git a/docs/source/datatypes_classes/general_purpose.rst b/docs/source/datatypes_classes/general_purpose.rst deleted file mode 100644 index 55e76fcd..00000000 --- a/docs/source/datatypes_classes/general_purpose.rst +++ /dev/null @@ -1,39 +0,0 @@ -General Purpose Types -@@@@@@@@@@@@@@@@@@@@@ - -TBD describe and pull the gks-common general puprose datatypes used by vrs - -Range -##### - -.. include:: ../defs/vrs/Range.rst - -Expression -########## - -.. include:: ../defs/vrs/Expression.rst - -Code -#### - -.. include:: ../defs/gks.common/Code.rst - -Coding -###### - -.. include:: ../defs/gks.common/Coding.rst - -Mapping -####### - -.. include:: ../defs/gks.common/Mapping.rst - -MappableEntity -############## - -.. include:: ../defs/gks.common/MappableEntity.rst - -Extension -######### - -.. include:: ../defs/gks.common/Extension.rst diff --git a/docs/source/datatypes_classes/index.rst b/docs/source/datatypes_classes/index.rst deleted file mode 100644 index 44454638..00000000 --- a/docs/source/datatypes_classes/index.rst +++ /dev/null @@ -1,31 +0,0 @@ -Datatypes & Classes -@@@@@@@@@@@@@@@@@@@ - -The VRS specification defines a set of datatypes that are used for the data class elements. There are four categories of datatypes: - -1. The base abstract types that provide the foundation for all types. -2. Simple / primitive types, which are single elements with a primitive value. -3. General-purpose complex types, which are re-usable clusters of elements. -4. Special purpose datatypes - defined elsewhere in the specification for specific usages. - -The VRS data classes are the identifiable entities that are used to represent biological entities. These are defined in the :ref:`data_classes` section. - -These are the identifiable classes in scope for VRS: - -* SequenceLocation -* Allele -* Haplotype -* CopyNumberCount -* CopyNumberChange -* Genotype - -.. toctree:: - :maxdepth: 2 - :includehidden: - - base_types - primitives - general_purpose - special_purpose - classes - \ No newline at end of file diff --git a/docs/source/datatypes_classes/primitives.rst b/docs/source/datatypes_classes/primitives.rst deleted file mode 100644 index c806a745..00000000 --- a/docs/source/datatypes_classes/primitives.rst +++ /dev/null @@ -1,64 +0,0 @@ -Primitive Types -@@@@@@@@@@@@@@@ - -Primitive types represent simple values with syntactic or other -constraints. They enable correctness for values stored in VRS. - -.. _IRI: - -IRI -### - -.. include:: ../defs/gks.common/IRI.rst - -.. _Residue: - -Residue -####### - -A residue refers to a specific `monomer`_ within the `polymeric -chain`_ of a `protein`_ or `nucleic acid`_ (Source: `Wikipedia -Residue page`_). - -.. include:: ../defs/vrs/Residue.rst - -.. _SequenceString: - -SequenceString -############## - -A *sequence* is a character string representation of a contiguous, -linear polymer of nucleic acid or amino acid :ref:`Residues `. -Sequences are the prevalent representation of these polymers, -particularly in the domain of variant representation. - -.. include:: ../defs/vrs/SequenceString.rst - -**Information Model** - -A string constrained to match the regular expression ``^[A-Z*\-]*$``, -derived from the IUPAC one-letter nucleic acid and amino acid codes. - -**Implementation Guidance** - -* Sequences MAY be empty (zero-length) strings. Empty sequences are used as the - replacement Sequence for deletion Alleles. -* Sequences MUST consist of only uppercase IUPAC abbreviations, including ambiguity codes. -* A Sequence provides a stable coordinate system by which an :ref:`Allele` MAY be located and - interpreted. -* A Sequence MAY have several roles. A "reference sequence" is any Sequence used - to define an :ref:`Allele`. A Sequence that replaces another Sequence is - called a "replacement sequence". -* In some contexts outside VRS, "reference sequence" may refer - to a member of set of sequences that comprise a genome assembly. In VRS - specification, any sequence may be a "reference sequence", including those in - a genome assembly. -* For the purposes of representing sequence variation, it is not - necessary that Sequences be explicitly "typed" (i.e., DNA, RNA, or - AA). - -**Examples** - -.. parsed-literal:: - - "ACGT" (string) diff --git a/docs/source/datatypes_classes/special_purpose.rst b/docs/source/datatypes_classes/special_purpose.rst deleted file mode 100644 index fc939067..00000000 --- a/docs/source/datatypes_classes/special_purpose.rst +++ /dev/null @@ -1,87 +0,0 @@ -Special Purpose Types -@@@@@@@@@@@@@@@@@@@@@ - -.. _SequenceExpression: - -Sequence Expressions -#################### - -VRS provides several syntaxes for expressing a sequence, -collectively referred to as *Sequence Expressions*. They are: - -* :ref:`LiteralSequenceExpression`: An explicit :ref:`Sequence`. -* :ref:`ReferenceLengthExpression`: TBD -* :ref:`LengthExpression`: - -Some SequenceExpression instances may appear to resolve to the same -sequence, but are intended to be semantically distinct. There MAY be -reasons to select or enforce one form over another that SHOULD be -managed by implementations. See discussion on :ref:`equivalence`. - -.. include:: ../defs/vrs/SequenceExpression.rst - -.. _ReferenceLengthExpression: - -ReferenceLengthExpression -$$$$$$$$$$$$$$$$$$$$$$$$$ - -A ReferenceLengthExpression ... - -.. include:: ../defs/vrs/ReferenceLengthExpression.rst - -**Examples** - -.. parsed-literal:: - - tbd - -.. _LengthExpression: - -LengthExpression -$$$$$$$$$$$$$$$$ - -A LengthExpression ... - -.. include:: ../defs/vrs/LengthExpression.rst - -**Examples** - -.. parsed-literal:: - - tbd - -.. _LiteralSequenceExpression: - -LiteralSequenceExpression -$$$$$$$$$$$$$$$$$$$$$$$$$ - -A LiteralSequenceExpression "wraps" a string representation of a -sequence for parallelism with other SequenceExpressions. - -.. include:: ../defs/vrs/LiteralSequenceExpression.rst - -**Examples** - -.. parsed-literal:: - - { - "sequence": "ACGT", - "type": "LiteralSequenceExpression" - } - - -.. _SequenceReference: - -Sequence Reference -################## - -tbd - -.. include:: ../defs/vrs/SequenceReference.rst - -.. _genotypemember: - -GenotypeMember -############## - -.. include:: ../defs/vrs/GenotypeMember.rst \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 382afc82..a5b08a30 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -21,9 +21,9 @@ implementation that promotes adoption of the standard. :includehidden: introduction - terms_and_model - datatypes_classes/index - schema - impl-guide/index + quickstart + concepts/index + conventions/index + profiles/index releases/index appendices/index diff --git a/docs/source/profiles/index.rst b/docs/source/profiles/index.rst new file mode 100644 index 00000000..7ee1f819 --- /dev/null +++ b/docs/source/profiles/index.rst @@ -0,0 +1 @@ +This is a placeholder for the profiles index. \ No newline at end of file diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst new file mode 100644 index 00000000..e69de29b