diff --git a/docs/src/conf.py b/docs/src/conf.py
index a2670453b..16a2fc335 100644
--- a/docs/src/conf.py
+++ b/docs/src/conf.py
@@ -113,3 +113,35 @@ def prose_list(items):
'auspice': ('https://docs.nextstrain.org/projects/auspice/en/stable', None),
'snakemake': ('https://snakemake.readthedocs.io/en/stable', None),
}
+
+# -- Linkchecking ------------------------------------------------------------
+
+linkcheck_ignore = [
+ # we have links to localhost for explanatory purposes; obviously
+ # they will never work in the linkchecker
+ r'^http://127\.0\.0\.1:\d+',
+ r'^http://localhost:\d+',
+ # the top level bucket link 404s
+ r'^https://data\.nextstrain\.org$',
+ # they block the client, probably anti-scraping measure
+ r'^https://czgenepi\.org/resources',
+ r'^https://science\.sciencemag\.org/content/early/2020/06/05/science\.abb9263',
+ # this link is correct but the lack of a top-level dataset means
+ # it 404s initially — because the point of this link is showing
+ # the community page, allow it to fail here:
+ r'^https://nextstrain\.org/community/ESR-NZ/GenomicsNarrativeSARSCoV2$'
+]
+linkcheck_anchors_ignore_for_url = [
+ # Github uses anchor-looking links for highlighting lines but
+ # handles the actual resolution with Javascript, so skip anchor
+ # checks for Github URLs:
+ r'^https://github\.com',
+ # you need to be logged in to see the anchor (and it looks like
+ # Terra is using it for redirecting more than anchoring too…)
+ r'^https://app\.terra\.bio/',
+ # client is blocked but links work
+ r'^https://www\.science\.org/doi/10\.1126/science\.abb9263',
+ # linkchecker doesn't support text fragments, and we link to one
+ # anchored to this page
+ r'^https://en\.wikipedia\.org/wiki/Consensus_sequence',
+]
diff --git a/docs/src/guides/data-prep/gisaid-search.rst b/docs/src/guides/data-prep/gisaid-search.rst
index 690fe5e80..d7e47b277 100644
--- a/docs/src/guides/data-prep/gisaid-search.rst
+++ b/docs/src/guides/data-prep/gisaid-search.rst
@@ -9,7 +9,7 @@ The following instructions describe how to curate data for a region-specific ana
Login to GISAID
---------------
-Navigate to `GISAID (gisaid.org) `__ and select the “Login” link.
+Navigate to `GISAID (gisaid.org) `__ and select the “Login” link.
.. figure:: ../../images/gisaid-homepage.png
:alt: GISAID homepage with login link
diff --git a/docs/src/guides/update-workflow.rst b/docs/src/guides/update-workflow.rst
index 27365acbe..6ef3bb9cb 100644
--- a/docs/src/guides/update-workflow.rst
+++ b/docs/src/guides/update-workflow.rst
@@ -5,7 +5,7 @@ We update the official workflow regularly with:
- `curated metadata including latitudes/longitudes, clade annotations, and low quality sequences `__
- bug fixes
-- `new features <../reference/change_log>`__
+- :doc:`new features <../reference/change_log>`
Update your local copy of the workflow, to benefit from these changes.
@@ -21,7 +21,7 @@ Update your local copy of the workflow, to benefit from these changes.
# and then replay your local changes on top of those incoming changes.
git pull --rebase origin master
-Alternately, download a specific version of the workflow that you know works for you. We create new `releases of the workflow `__ any time we introduce breaking changes, so you can choose when to update based on `what has changed <../reference/change_log>`__.
+Alternately, download a specific version of the workflow that you know works for you. We create new `releases of the workflow `__ any time we introduce breaking changes, so you can choose when to update based on :doc:`what has changed <../reference/change_log>`.
.. code:: bash
diff --git a/docs/src/reference/change_log.md b/docs/src/reference/change_log.md
index 08c030d3c..e7095c833 100644
--- a/docs/src/reference/change_log.md
+++ b/docs/src/reference/change_log.md
@@ -107,7 +107,7 @@ We also use this change log to document new features that maintain backward comp
## New features since last version update
- - 11 August 2021: Add support for "Sequences" and "Patient status metadata" downloads from GISAID's search interface including [documentation in the tutorial of how to use these data](../guides/data-prep.html#curate-data-from-gisaid-search-and-downloads). ([#701](https://github.com/nextstrain/ncov/pull/701))
+ - 11 August 2021: Add support for "Sequences" and "Patient status metadata" downloads from GISAID's search interface including [documentation in the tutorial of how to use these data](https://docs.nextstrain.org/projects/ncov/en/latest/guides/data-prep/gisaid-search.html). ([#701](https://github.com/nextstrain/ncov/pull/701))
- 6 August 2021: We've replaced the mechanisms that support remote file inputs (e.g. `s3://` URLs) to improve internal workflow structure, extend support to `gs://`, `http://`, and `https://` URLs, and expand support for compressed inputs.
Our [remote file inputs documentation](remote_inputs) is updated to reflect the changes.
@@ -128,7 +128,7 @@ We also use this change log to document new features that maintain backward comp
## v7 (27 May 2021)
-For more details about this release, see [the configuration reference for the new "sanitize metadata" parameters](configuration.html#sanitize_metadata) and [the corresponding pull request](https://github.com/nextstrain/ncov/pull/640).
+For more details about this release, see [the configuration reference for the new "sanitize metadata" parameters](https://docs.nextstrain.org/projects/ncov/en/latest/reference/workflow-config-file.html#sanitize-metadata) and [the corresponding pull request](https://github.com/nextstrain/ncov/pull/640).
### Major changes
@@ -141,7 +141,7 @@ For more details about this release, see [the configuration reference for the ne
## New features since last version update
- - 25 May 2021: Support custom Auspice JSON prefixes with a new configuration parameter, `auspice_json_prefix`. [See the configuration reference for more details](configuration.html#auspice_json_prefix). ([#643](https://github.com/nextstrain/ncov/pull/643))
+ - 25 May 2021: Support custom Auspice JSON prefixes with a new configuration parameter, `auspice_json_prefix`. [See the configuration reference for more details](https://docs.nextstrain.org/projects/ncov/en/latest/reference/workflow-config-file.html#auspice-json-prefix). ([#643](https://github.com/nextstrain/ncov/pull/643))
## v6 (20 May 2021)
@@ -161,7 +161,7 @@ For more details about this release, see [the configuration reference for the ne
### Major changes
-- Drop support for old sequence/metadata inputs. This change removes support for the `config["sequences"]` and `config["metadata"]` starting points for the workflow in favor of the more flexible [`config["inputs"]` format](configuration.html#inputs).
+- Drop support for old sequence/metadata inputs. This change removes support for the `config["sequences"]` and `config["metadata"]` starting points for the workflow in favor of the more flexible [`config["inputs"]` format](https://docs.nextstrain.org/projects/ncov/en/latest/reference/workflow-config-file.html#inputs).
- Use `nextalign` for alignment instead of `mafft`. This change completely removes support for `mafft` in favor of `nextalign`. Future versions may reinstate `mafft` support as part of `augur align` updates.
### Minor changes
@@ -188,7 +188,7 @@ For more details about this release, see [the configuration reference for the ne
## New features since last version update
-- 20 April 2021: Surface emerging lineage as a colorby. This replaces the rather stale color by "Emerging Clade" with a new color by "Emerging Lineage". This focuses on PANGO lineages that are of interest triangulated by [CoVariants](https://covariants.org/), [PANGO](https://cov-lineages.org/) international lineage reports, [CDC](https://www.cdc.gov/coronavirus/2019-ncov/cases-updates/variant-surveillance/variant-info.html) VUIs and VOCs and [PHE](https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/975742/Variants_of_Concern_VOC_Technical_Briefing_8_England.pdf) VUIs and VOCs. The intention is for the listing at `emerging_lineages.tsv` to be updated frequently with new lineages added and no longer interesting lineages dropped. [#609](https://github.com/nextstrain/ncov/pull/609)
+- 20 April 2021: Surface emerging lineage as a colorby. This replaces the rather stale color by "Emerging Clade" with a new color by "Emerging Lineage". This focuses on PANGO lineages that are of interest triangulated by [CoVariants](https://covariants.org/), [PANGO](https://cov-lineages.org/) international lineage reports, [CDC](https://www.cdc.gov/covid/php/variants/index.html) VUIs and VOCs and [PHE](https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/975742/Variants_of_Concern_VOC_Technical_Briefing_8_England.pdf) VUIs and VOCs. The intention is for the listing at `emerging_lineages.tsv` to be updated frequently with new lineages added and no longer interesting lineages dropped. [#609](https://github.com/nextstrain/ncov/pull/609)
- 12 April 2021: Calculate current clade frequency and logistic growth rate across nodes in the phylogeny. This produces a new `logistic_growth.json` file and uses this file to add a coloring the final Auspice JSON. Implementation choices are discussed in PR [#595](https://github.com/nextstrain/ncov/pull/595).
diff --git a/docs/src/reference/metadata-fields.rst b/docs/src/reference/metadata-fields.rst
index 0760b63a0..79ae98c68 100644
--- a/docs/src/reference/metadata-fields.rst
+++ b/docs/src/reference/metadata-fields.rst
@@ -21,7 +21,7 @@ Name of the pathogen.
Column 3: ``gisaid_epi_isl``
--------------------------------------
-If this genome is shared via `GISAID `__ then please include the EPI ISL here. In our example this is ``EPI_ISL_413490``.
+If this genome is shared via `GISAID `__ then please include the EPI ISL here. In our example this is ``EPI_ISL_413490``.
Column 4: ``genbank_accession``
--------------------------------------
@@ -112,12 +112,12 @@ Sex of the patient from whom the sample was collected. This will show up in ausp
Column 18: ``originating_lab``
--------------------------------------
-Please see `GISAID `__ for more information.
+Please see `GISAID `__ for more information.
Column 19: ``submitting_lab``
--------------------------------------
-Please see `GISAID `__ for more information.
+Please see `GISAID `__ for more information.
Column 20: ``authors``
--------------------------------------
@@ -127,7 +127,7 @@ Author of the genome sequence, or the paper which announced this genome. Typical
Column 21: ``url``
--------------------------------------
-The URL, if available, pointing to the genome data. For most SARS-CoV-2 data this is https://www.gisaid.org.
+The URL, if available, pointing to the genome data. For most SARS-CoV-2 data this is https://gisaid.org.
Column 22: ``title``
--------------------------------------
diff --git a/docs/src/reference/remote_inputs.rst b/docs/src/reference/remote_inputs.rst
index c83536571..1303c6eef 100644
--- a/docs/src/reference/remote_inputs.rst
+++ b/docs/src/reference/remote_inputs.rst
@@ -27,7 +27,7 @@ Entire metadata & sequences data is uploaded from the ``ncov-ingest`` workflows
- ``metadata.tsv.zst`` and ``metadata.tsv.gz``
- ``sequences.fasta.zst`` and ``sequences.fasta.xz``
- ``nextclade.tsv.zst`` and ``nextclade.tsv.gz``
-- ``aligned.fasta.zst`` and ``aligned.fasta.xz`` (Alignment via `Nextclade `__. The default reference genome is `MN908947 `__ (Wuhan-Hu-1))
+- ``aligned.fasta.zst`` and ``aligned.fasta.xz`` (Alignment via `Nextclade `__. The default reference genome is `MN908947 `__ (Wuhan-Hu-1))
- ``additional_info.tsv.zst`` and ``additional_info.tsv.gz`` (GISAID only)
- ``flagged_metadata.txt.zst`` and ``flagged_metadata.txt.gz`` (GISAID only)
diff --git a/docs/src/reference/workflow-config-file.rst b/docs/src/reference/workflow-config-file.rst
index 8b29387cf..23f9ed789 100644
--- a/docs/src/reference/workflow-config-file.rst
+++ b/docs/src/reference/workflow-config-file.rst
@@ -15,6 +15,8 @@ Primary configuration
Parameters in this section define the main inputs and outputs of the workflow, as well as the commonly used ``subsampling`` rule.
Often these will be the only parameters you need to modify.
+.. _inputs:
+
inputs
------
@@ -198,7 +200,7 @@ subsampling
-----------
- type: object
-- description: Schemes for subsampling data prior to phylogenetic inference to avoid sampling bias or focus an analysis on specific spatial and/or temporal scales. `See the SARS-CoV-2 tutorial for more details on defining subsampling schemes <../reference/customizing-analysis.html#subsampling>`__.
+- description: Schemes for subsampling data prior to phylogenetic inference to avoid sampling bias or focus an analysis on specific spatial and/or temporal scales. See the :doc:`genomic surveillance tutorial <../tutorial/genomic-surveillance>` for an example.
Predefined subsampling schemes are:
@@ -400,6 +402,7 @@ strip_strain_prefixes
- description: A list of prefixes to strip from strain names in metadata and sequence records to maintain consistent strain names when analyzing data from multiple sources.
- default: ``["hCoV-19/", "SARS-CoV-2/"]``
+.. _auspice_json_prefix:
auspice_json_prefix
-------------------
@@ -595,6 +598,8 @@ Per-Rule configuration
Each top-level parameter here corresponds to a single Snakemake rule.
Note that ``subsampling`` is a commonly used rule configuration which is described separately in the Primary configuration section.
+.. _sanitize_metadata:
+
sanitize_metadata
-----------------
diff --git a/docs/src/tutorial/custom-data.rst b/docs/src/tutorial/custom-data.rst
index 25945e2e4..76eff7e62 100644
--- a/docs/src/tutorial/custom-data.rst
+++ b/docs/src/tutorial/custom-data.rst
@@ -12,7 +12,7 @@ Prerequisites
-------------
1. :doc:`example-data`. This tutorial sets up the command line environment used in the following tutorial.
-2. `Register for a GISAID account `_, if you do not have one yet. However, registration may take a few days. Follow :doc:`alternative data preparation methods <../guides/data-prep/index>` in place of :ref:`custom-data-curate-data-from-gisaid`, if you wish to continue the following tutorial in the meantime.
+2. `Register for a GISAID account `_, if you do not have one yet. However, registration may take a few days. Follow :doc:`alternative data preparation methods <../guides/data-prep/index>` in place of :ref:`custom-data-curate-data-from-gisaid`, if you wish to continue the following tutorial in the meantime.
Setup
-----
@@ -30,7 +30,7 @@ Curate data from GISAID
We will retrieve 10 sequences from GISAID's EpiCoV database.
-1. Navigate to `GISAID `__ and select **Login**.
+1. Navigate to `GISAID `__ and select **Login**.
.. image:: ../images/gisaid-homepage.png
:width: 400
diff --git a/docs/src/tutorial/genomic-surveillance.rst b/docs/src/tutorial/genomic-surveillance.rst
index 6a322c3c9..59f2724b2 100644
--- a/docs/src/tutorial/genomic-surveillance.rst
+++ b/docs/src/tutorial/genomic-surveillance.rst
@@ -15,7 +15,7 @@ Prerequisites
-------------
1. :doc:`custom-data`. This tutorial introduces concepts expanded by the following tutorial.
-2. `Register for a GISAID account `_, if you do not have one yet. However, registration may take a few days. Follow :doc:`alternative data preparation methods <../guides/data-prep/index>` in place of :ref:`genomic-surveillance-curate-data-from-gisaid`, if you wish to continue the following tutorial in the meantime.
+2. `Register for a GISAID account `_, if you do not have one yet. However, registration may take a few days. Follow :doc:`alternative data preparation methods <../guides/data-prep/index>` in place of :ref:`genomic-surveillance-curate-data-from-gisaid`, if you wish to continue the following tutorial in the meantime.
Setup
-----
@@ -33,7 +33,7 @@ Curate data from GISAID
We will download a focal set of Idaho sequences from GISAID's EpiCoV database.
-1. Navigate to `GISAID `__, **Login**, and go to **EpiCoV** > **Search**.
+1. Navigate to `GISAID `__, **Login**, and go to **EpiCoV** > **Search**.
.. image:: ../images/gisaid-epicov-search.png
:width: 400
diff --git a/docs/src/tutorial/next-steps.rst b/docs/src/tutorial/next-steps.rst
index f077384e9..4f4a99d34 100644
--- a/docs/src/tutorial/next-steps.rst
+++ b/docs/src/tutorial/next-steps.rst
@@ -73,7 +73,7 @@ Additional resources
- `An applied genomic epidemiological handbook `__ by Allison Black and Gytis Dudas
- `Genomic Epidemiology Seminar Series `__ by Chan Zuckerberg Initiative Genomic Epidemiology (CZ GEN EPI)
- - `COVID-19 Genomic Epidemiology Toolkit `__ by Centers for Disease Control and Prevention (CDC)
+ - `COVID-19 Genomic Epidemiology Toolkit `__ by Centers for Disease Control and Prevention (CDC)
- :doc:`Review all possible options to configure your SARS-CoV-2 analyses with Nextstrain <../reference/workflow-config-file>`.
- Watch `this 1-hour video overview `__ by Heather Blankenship on how to deploy Nextstrain for a Public Health lab.
diff --git a/docs/src/tutorial/setup.rst b/docs/src/tutorial/setup.rst
index 5c37415fe..3cf885197 100644
--- a/docs/src/tutorial/setup.rst
+++ b/docs/src/tutorial/setup.rst
@@ -10,7 +10,7 @@ Register for a GISAID account
-----------------------------
Some tutorials rely on data downloaded from `GISAID `_.
-If you do not already have one, `register for a GISAID account `_ now.
+If you do not already have one, `register for a GISAID account `_ now.
Registration may take a few days.
Install Nextstrain components
diff --git a/docs/src/visualization/interpretation.rst b/docs/src/visualization/interpretation.rst
index 3eea237ae..3dc62c9c1 100644
--- a/docs/src/visualization/interpretation.rst
+++ b/docs/src/visualization/interpretation.rst
@@ -10,7 +10,7 @@ Introductory resources
- How to interact with Auspice (the engine for viewing trees): https://neherlab.org/201901_krisp_auspice.html
-- Overview of genomic epidemiology (older, but still relevant and clear): http://evolve.zoo.ox.ac.uk/Evolve/Oliver_Pybus_files/EvolAnalysisOfDynamicsOfViruses.pdf
+- Overview of genomic epidemiology (older, but still relevant and clear): https://www.nature.com/articles/nrg2583
Case Studies
------------
diff --git a/docs/src/visualization/narratives.rst b/docs/src/visualization/narratives.rst
index 61a96c653..0e15b5e53 100644
--- a/docs/src/visualization/narratives.rst
+++ b/docs/src/visualization/narratives.rst
@@ -5,7 +5,7 @@ Nextstrain Narratives allow you to pair a specific view of a dataset with text a
For examples, `see our weekly Situation Reports `__ from the first several months of the pandemic.
-You can `read more about narratives `__ or `watch our Nextstrain narratives tutorial videos `_. We've also `provided a template narrative file `__ for you to edit.
+You can `read more about narratives `__ or `watch our Nextstrain narratives tutorial videos `_. We've also `provided a template narrative file `__ for you to edit.
You can preview the template narrative by navigating to https://nextstrain.org/community/narratives/nextstrain/ncov/template/narrative.