Skip to content

Commit

Permalink
WIP -- feat: port over clinvar tests (#21)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Mar 2, 2023
1 parent 6873324 commit fb34571
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 4 deletions.
125 changes: 125 additions & 0 deletions src/mapper/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1639,6 +1639,131 @@ mod test {

Ok(())
}

/// The following is a port of `test_clinvar.py`.
mod clinvar {
use std::io::BufReader;
use std::str::FromStr;

use std::ops::Deref;

use flate2::read::GzDecoder;

use crate::parser::HgvsVariant;

use super::build_mapper;

#[test]
fn run() -> Result<(), anyhow::Error> {
let path = "tests/data/mapper/clinvar.gz";
let f = std::fs::File::open(path)?;
let decoder = GzDecoder::new(f);
let rdr = BufReader::new(decoder);
let mut rdr = csv::ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(true)
.comment(Some(b'#'))
.flexible(true)
.from_reader(rdr);

for row in rdr.records() {
let row = row?;

let gene = row.get(0).unwrap();
let variation_id = row.get(1).unwrap();
let hgvs_variants: Result<Vec<_>, _> = row
.get(2)
.unwrap()
.split(' ')
.into_iter()
.map(|s| HgvsVariant::from_str(s))
.collect();
let hgvs_variants = hgvs_variants?
.into_iter()
.filter(|v| !v.accession().deref().starts_with("NG"))
.collect::<Vec<_>>();

cross_check(hgvs_variants, gene, variation_id)?;
}

Ok(())
}

fn cross_check(
hgvs_variants: Vec<HgvsVariant>,
gene: &str,
variation_id: &str,
) -> Result<(), anyhow::Error> {
let mapper = build_mapper()?;

if variation_id == "18176" {
return Ok(())
}

let mut binned_g = Vec::new();
let mut binned_t = Vec::new();
let mut binned_c = Vec::new();
let mut binned_p = Vec::new();

for variant in hgvs_variants {
match &variant {
HgvsVariant::GenomeVariant { .. } => binned_g.push(variant),
HgvsVariant::CdsVariant { .. } => {
binned_c.push(variant.clone());
binned_t.push(variant)
}
HgvsVariant::TxVariant { .. } => binned_t.push(variant),
HgvsVariant::ProtVariant { .. } => binned_p.push(variant),
_ => (),
}
}

// g -> t: for each g., map to each transcript accession.
for var_g in &binned_g {
for var_t in &binned_t {
let res = mapper.g_to_t(&var_g, &var_t.accession(), "splign");
assert!(
res.is_ok(),
"var_g={}, var_t={}, gene={}, variation_id={} -- {:?}",
&var_g,
&var_t,
&gene,
&variation_id,
&res
);

let res = res.unwrap();
assert_eq!(
format!("{}", &var_t),
format!("{}", &res),
"g_to_t({}, {}) = {} but expected {}, gene={}, variation_id={}",
var_g,
var_t.accession(),
&res,
&var_t,
&gene,
&variation_id,
);
}
}

// t -> g: for each t., map to each genomic accession.
// for var_t in &binned_t {
// for var_g in &binned_g {

// }
// }

// c -> p: for each c., map to a protein variant and check whether it's in result set.
// for var_p in &binned_p {
// for var_c in &binned_c {

// }
// }

Ok(())
}
}
}

// <LICENSE>
Expand Down
1 change: 1 addition & 0 deletions tests/data/data/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ set -e

# Augment list of genes to fetch.
GENES="$GENES $(cut -f 1 tests/data/mapper/real_cp.tsv | tail -n +2)"
GENES="$GENES $(zcat tests/data/mapper/clinvar.gz | grep -v ^# | cut -f 1 | tail -n +2 | sort -u)"
# Transform gene list for postgres query.
PG_GENES=$(pg-list $GENES)
Expand Down
14 changes: 14 additions & 0 deletions tests/data/data/subset.awk
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ BEGIN {
next; # skip for testing
}

# We don't need any of the materialized views.
if ($0 ~ /REFRESH/ || $0 ~ /CREATE.*INDEX .*_mv/) {
print "-- " $0;
next;
} else if ($0 ~ /MATERIALIZED VIEW/) {
gsub(/MATERIALIZED VIEW/, "VIEW", $0);
print $0;
next;
} else if ($0 ~ /WITH NO DATA/) {
gsub(/WITH NO DATA/, "", $0);
print $0;
next;
}

if ($0 ~ /^COPY/) {
table_name = $2;
gsub(/.*?\./, "", table_name);
Expand Down
4 changes: 2 additions & 2 deletions tests/data/data/uta_20210129-subset.pgd.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/data/mapper/clinvar.gz
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/data/seqrepo_cache.fasta
Git LFS file not shown

0 comments on commit fb34571

Please sign in to comment.