Skip to content

Commit

Permalink
minor updates prior to branch switch
Browse files Browse the repository at this point in the history
  • Loading branch information
cmaceves committed Jan 23, 2024
1 parent ff631ba commit 9e8620d
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 142 deletions.
13 changes: 13 additions & 0 deletions src/allele_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@
#include <string>
#include <vector>

std::vector<allele> populate_basic_alleles(){
std::vector<allele> ad;
std::vector<std::string> nts = { "A", "C", "G", "T", "-"};
for(uint32_t i =0; i < nts.size(); i++){
allele tmp;
tmp.depth = 0;
tmp.nuc = nts[i];
ad.push_back(tmp);
}
return(ad);
}

void print_allele_depths(std::vector<allele> ad) {
std::cout << "AD Size: " << ad.size() << " " << std::endl;
for (std::vector<allele>::iterator it = ad.begin(); it != ad.end(); ++it) {
Expand Down Expand Up @@ -56,6 +68,7 @@ void position::update_alleles(std::string nt, uint32_t count, uint32_t qual, boo
tmp.is_ref = ref;
alleles.push_back(tmp);
} else {
alleles[exists].is_ref = ref;
alleles[exists].mean_qual += qual;
alleles[exists].depth += count;
}
Expand Down
3 changes: 2 additions & 1 deletion src/allele_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ struct allele {
uint32_t beg;
uint32_t end;
float tmp_mean_qual;
bool is_ref;
bool is_ref = false;
bool operator<(const allele& a) const {
return (nuc.compare(a.nuc) > 0) ? true : false;
}
Expand Down Expand Up @@ -44,4 +44,5 @@ int find_ref_in_allele(std::vector<allele> ad, char ref);
char gt2iupac(char a, char b);
char codon2aa(char n1, char n2, char n3);
uint32_t sum_allele_depths(std::vector<allele> test);
std::vector<allele> populate_basic_alleles();
#endif
59 changes: 7 additions & 52 deletions src/interval_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ std::vector<allele> add_allele_vectors(std::vector<allele> new_alleles, std::vec
if (return_alleles[j].nuc == new_alleles[i].nuc){
return_alleles[j].depth += new_alleles[i].depth;
return_alleles[j].mean_qual += new_alleles[i].mean_qual;
return_alleles[j].is_ref = new_alleles[i].is_ref;
found = true;
break;
}
Expand All @@ -27,14 +28,11 @@ std::vector<allele> add_allele_vectors(std::vector<allele> new_alleles, std::vec
return(return_alleles);
}


void IntervalTree::combine_haplotypes(ITNode *root){
if (root==NULL) return;
for(uint32_t i=0; i < root->amp_positions.size(); i++){
int exists = check_position_exists(root->amp_positions[i].pos, variants);
//does exist
variants[root->amp_positions[i].pos].depth += root->amp_positions[i].depth;
std::vector<allele> new_alleles = add_allele_vectors(root->amp_positions[i].alleles, variants[exists].alleles);
std::vector<allele> new_alleles = add_allele_vectors(root->amp_positions[i].alleles, variants[root->amp_positions[i].pos].alleles);
variants[root->amp_positions[i].pos].alleles = new_alleles;
}
combine_haplotypes(root->right);
Expand Down Expand Up @@ -70,6 +68,7 @@ void IntervalTree::add_read_variants(uint32_t *cigar, uint32_t start_pos, uint32
uint32_t length = 0;
uint32_t ll = 0;
uint32_t op;
//auto start = high_resolution_clock::now();
for(uint32_t i=0; i < nlength; i++){
op = bam_cigar_op(cigar[i]);
if (bam_cigar_type(op) & 1){
Expand Down Expand Up @@ -155,9 +154,6 @@ void IntervalTree::add_read_variants(uint32_t *cigar, uint32_t start_pos, uint32
uint32_t j = 0;
do {
char character = (char)aux[j];
if(test == qname){
std::cerr << "aux " << aux[j] << " digits " << gather_digits << " current pos " << current_pos << " deletion " << deletion << " is digit " << isdigit(character) << std::endl;
}
if (character == '^'){
current_pos += std::stoi(gather_digits);
gather_digits = "";
Expand All @@ -167,23 +163,14 @@ void IntervalTree::add_read_variants(uint32_t *cigar, uint32_t start_pos, uint32
deletion = false;
gather_digits += character;
} else if (isalpha(character) && deletion) {
int exists = check_position_exists(current_pos, variants);
if (exists != -1) {
variants[exists].update_alleles("-", 1, 0, false);
} else {
//add position to vector
position add_pos;
add_pos.pos = current_pos; //don't add a position
add_pos.update_alleles("-", 1, 0, false);
variants.push_back(add_pos);
}
variants[current_pos].update_alleles("-", 1, 0, false);
deletion_positions.push_back(current_pos);
current_pos += 1;
deleted_char += character;
deletion = true;
} else if (isdigit(character) && !deletion) {
if(substitution){
for(uint32_t z=0; z < sub_char.size(); z++){
for(uint32_t z=1; z < sub_char.size(); z++){
substitutions.push_back(current_pos + z);
}
substitution = false;
Expand All @@ -206,16 +193,6 @@ void IntervalTree::add_read_variants(uint32_t *cigar, uint32_t start_pos, uint32
j++;
} while(aux[j] != '\0');

if(qname == test) {
std::cerr << "deletion pos" << std::endl;
for(uint32_t i=0; i<deletion_positions.size(); i++){
std::cerr << deletion_positions[i] << std::endl;
}
std::cerr << "insertion pos" << std::endl;
for(uint32_t i=0; i < ignore_sequence.size(); i++){
std::cerr << ignore_sequence[i] << std::endl;
}
}
//now that we know where the insertions and deletions are, let's just iterate the query sequence and add it in, skipping problem positions
current_pos = start_pos;
bool prev_insertion = false;
Expand All @@ -238,9 +215,6 @@ void IntervalTree::add_read_variants(uint32_t *cigar, uint32_t start_pos, uint32
i_it = find(seen_insertions.begin(), seen_insertions.end(), current_pos);
//handle insertions
if (it != ignore_sequence.end() && i_it == seen_insertions.end()) {
if(qname == test){
std::cerr << j << " " << current_pos << " insertion" << std::endl;
}
std::ostringstream convert;
convert << sequence[j];
nuc += convert.str();
Expand All @@ -258,37 +232,24 @@ void IntervalTree::add_read_variants(uint32_t *cigar, uint32_t start_pos, uint32
}
it = find(sc_positions.begin(), sc_positions.end(), j);
if (it != sc_positions.end()){
if(qname == test){
std::cerr << "soft clipped" << std::endl;
}
continue;
}
current_pos += 1;
int exists = check_position_exists(current_pos, variants);
std::ostringstream convert;
bool ref = false;
convert << sequence[j];
std::string nuc = convert.str();
if (std::find(substitutions.begin(), substitutions.end(), current_pos) == substitutions.end()){
ref = true;
}
if (nuc != "A" && nuc != "T" && nuc != "C" && nuc != "G" && nuc != "N") {
std::cerr << qname << " " << nuc << std::endl;
}
if (exists != -1) {
variants[exists].update_alleles(nuc, 1, quality[j], ref);
} else {
position add_pos;
add_pos.pos = current_pos; //don't add a position
add_pos.update_alleles(nuc, 1, quality[j], ref);
variants.push_back(add_pos);
}
variants[current_pos].update_alleles(nuc, 1, quality[j], ref);
}
}

void IntervalTree::populate_variants(){
for(uint32_t i=0; i <= max_pos; i++){
position tmp;
tmp.alleles = populate_basic_alleles();
tmp.pos = i;
variants.push_back(tmp);
}
Expand All @@ -315,9 +276,6 @@ void IntervalTree::set_haplotypes(ITNode *root, primer prim){
} else if (strand == '-' && ((int)prim.get_end()+1 != root->data->high)){
set_haplotypes(root->right, prim);
} else {
if(prim.get_start() == 33){
std::cerr << "THIS" << std::endl;
}
// we found the matching amplion, now we add this cigarotype to the amplicon
std::vector<position> tmp_pos = prim.get_positions();
for (uint32_t i=0; i < tmp_pos.size(); i++){
Expand All @@ -329,9 +287,6 @@ void IntervalTree::set_haplotypes(ITNode *root, primer prim){
if(here_pos.pos == tmp_pos[i].pos){
found = true;
root->amp_positions[j].depth += tmp_pos[i].depth;
if(here_pos.pos == 105){
print_allele_depths(tmp_pos[i].alleles);
}
std::vector<allele> new_alleles = add_allele_vectors(tmp_pos[i].alleles, root->amp_positions[j].alleles);
root->amp_positions[j].alleles = new_alleles;
break;
Expand Down
Loading

0 comments on commit 9e8620d

Please sign in to comment.