diff --git a/configure.ac b/configure.ac index 4b46b130..ad994ac1 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.63]) -AC_INIT([ivar], [1.2.4], [gkarthik@scripps.edu]) +AC_INIT([ivar], [1.3], [gkarthik@scripps.edu]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_HEADERS([config.h]) diff --git a/data/pair_info_2.tsv b/data/pair_info_2.tsv new file mode 100644 index 00000000..4277b70e --- /dev/null +++ b/data/pair_info_2.tsv @@ -0,0 +1,110 @@ +unk_x unk_y +nCoV-2019_1_LEFT nCoV-2019_1_RIGHT +nCoV-2019_2_LEFT nCoV-2019_2_RIGHT +nCoV-2019_3_LEFT nCoV-2019_3_RIGHT +nCoV-2019_4_LEFT nCoV-2019_4_RIGHT +nCoV-2019_5_LEFT nCoV-2019_5_RIGHT +nCoV-2019_6_LEFT nCoV-2019_6_RIGHT +nCoV-2019_7_LEFT nCoV-2019_7_RIGHT +nCoV-2019_7_LEFT_alt0 nCoV-2019_7_RIGHT_alt5 +nCoV-2019_8_LEFT nCoV-2019_8_RIGHT +nCoV-2019_9_LEFT nCoV-2019_9_RIGHT +nCoV-2019_9_LEFT_alt4 nCoV-2019_9_RIGHT_alt2 +nCoV-2019_10_LEFT nCoV-2019_10_RIGHT +nCoV-2019_11_LEFT nCoV-2019_11_RIGHT +nCoV-2019_12_LEFT nCoV-2019_12_RIGHT +nCoV-2019_13_LEFT nCoV-2019_13_RIGHT +nCoV-2019_14_LEFT nCoV-2019_14_RIGHT +nCoV-2019_14_LEFT_alt4 nCoV-2019_14_RIGHT_alt2 +nCoV-2019_15_LEFT nCoV-2019_15_RIGHT +nCoV-2019_15_LEFT_alt1 nCoV-2019_15_RIGHT_alt3 +nCoV-2019_16_LEFT nCoV-2019_16_RIGHT +nCoV-2019_17_LEFT nCoV-2019_17_RIGHT +nCoV-2019_18_LEFT nCoV-2019_18_RIGHT +nCoV-2019_18_LEFT_alt2 nCoV-2019_18_RIGHT_alt1 +nCoV-2019_19_LEFT nCoV-2019_19_RIGHT +nCoV-2019_20_LEFT nCoV-2019_20_RIGHT +nCoV-2019_21_LEFT nCoV-2019_21_RIGHT +nCoV-2019_21_LEFT_alt2 nCoV-2019_21_RIGHT_alt0 +nCoV-2019_22_LEFT nCoV-2019_22_RIGHT +nCoV-2019_23_LEFT nCoV-2019_23_RIGHT +nCoV-2019_24_LEFT nCoV-2019_24_RIGHT +nCoV-2019_25_LEFT nCoV-2019_25_RIGHT +nCoV-2019_26_LEFT nCoV-2019_26_RIGHT +nCoV-2019_27_LEFT nCoV-2019_27_RIGHT +nCoV-2019_28_LEFT nCoV-2019_28_RIGHT +nCoV-2019_29_LEFT nCoV-2019_29_RIGHT +nCoV-2019_30_LEFT nCoV-2019_30_RIGHT +nCoV-2019_31_LEFT nCoV-2019_31_RIGHT +nCoV-2019_32_LEFT nCoV-2019_32_RIGHT +nCoV-2019_33_LEFT nCoV-2019_33_RIGHT +nCoV-2019_34_LEFT nCoV-2019_34_RIGHT +nCoV-2019_35_LEFT nCoV-2019_35_RIGHT +nCoV-2019_36_LEFT nCoV-2019_36_RIGHT +nCoV-2019_37_LEFT nCoV-2019_37_RIGHT +nCoV-2019_38_LEFT nCoV-2019_38_RIGHT +nCoV-2019_39_LEFT nCoV-2019_39_RIGHT +nCoV-2019_40_LEFT nCoV-2019_40_RIGHT +nCoV-2019_41_LEFT nCoV-2019_41_RIGHT +nCoV-2019_42_LEFT nCoV-2019_42_RIGHT +nCoV-2019_43_LEFT nCoV-2019_43_RIGHT +nCoV-2019_44_LEFT nCoV-2019_44_RIGHT +nCoV-2019_44_LEFT_alt3 nCoV-2019_44_RIGHT_alt0 +nCoV-2019_45_LEFT nCoV-2019_45_RIGHT +nCoV-2019_45_LEFT_alt2 nCoV-2019_45_RIGHT_alt7 +nCoV-2019_46_LEFT nCoV-2019_46_RIGHT +nCoV-2019_46_LEFT_alt1 nCoV-2019_46_RIGHT_alt2 +nCoV-2019_47_LEFT nCoV-2019_47_RIGHT +nCoV-2019_48_LEFT nCoV-2019_48_RIGHT +nCoV-2019_49_LEFT nCoV-2019_49_RIGHT +nCoV-2019_50_LEFT nCoV-2019_50_RIGHT +nCoV-2019_51_LEFT nCoV-2019_51_RIGHT +nCoV-2019_52_LEFT nCoV-2019_52_RIGHT +nCoV-2019_53_LEFT nCoV-2019_53_RIGHT +nCoV-2019_54_LEFT nCoV-2019_54_RIGHT +nCoV-2019_55_LEFT nCoV-2019_55_RIGHT +nCoV-2019_56_LEFT nCoV-2019_56_RIGHT +nCoV-2019_57_LEFT nCoV-2019_57_RIGHT +nCoV-2019_58_LEFT nCoV-2019_58_RIGHT +nCoV-2019_59_LEFT nCoV-2019_59_RIGHT +nCoV-2019_60_LEFT nCoV-2019_60_RIGHT +nCoV-2019_61_LEFT nCoV-2019_61_RIGHT +nCoV-2019_62_LEFT nCoV-2019_62_RIGHT +nCoV-2019_63_LEFT nCoV-2019_63_RIGHT +nCoV-2019_64_LEFT nCoV-2019_64_RIGHT +nCoV-2019_65_LEFT nCoV-2019_65_RIGHT +nCoV-2019_66_LEFT nCoV-2019_66_RIGHT +nCoV-2019_67_LEFT nCoV-2019_67_RIGHT +nCoV-2019_68_LEFT nCoV-2019_68_RIGHT +nCoV-2019_69_LEFT nCoV-2019_69_RIGHT +nCoV-2019_70_LEFT nCoV-2019_70_RIGHT +nCoV-2019_71_LEFT nCoV-2019_71_RIGHT +nCoV-2019_72_LEFT nCoV-2019_72_RIGHT +nCoV-2019_73_LEFT nCoV-2019_73_RIGHT +nCoV-2019_74_LEFT nCoV-2019_74_RIGHT +nCoV-2019_75_LEFT nCoV-2019_75_RIGHT +nCoV-2019_76_LEFT nCoV-2019_76_RIGHT +nCoV-2019_76_LEFT_alt3 nCoV-2019_76_RIGHT_alt0 +nCoV-2019_77_LEFT nCoV-2019_77_RIGHT +nCoV-2019_78_LEFT nCoV-2019_78_RIGHT +nCoV-2019_79_LEFT nCoV-2019_79_RIGHT +nCoV-2019_80_LEFT nCoV-2019_80_RIGHT +nCoV-2019_81_LEFT nCoV-2019_81_RIGHT +nCoV-2019_82_LEFT nCoV-2019_82_RIGHT +nCoV-2019_83_LEFT nCoV-2019_83_RIGHT +nCoV-2019_84_LEFT nCoV-2019_84_RIGHT +nCoV-2019_85_LEFT nCoV-2019_85_RIGHT +nCoV-2019_86_LEFT nCoV-2019_86_RIGHT +nCoV-2019_87_LEFT nCoV-2019_87_RIGHT +nCoV-2019_88_LEFT nCoV-2019_88_RIGHT +nCoV-2019_89_LEFT nCoV-2019_89_RIGHT +nCoV-2019_89_LEFT_alt2 nCoV-2019_89_RIGHT_alt4 +nCoV-2019_90_LEFT nCoV-2019_90_RIGHT +nCoV-2019_91_LEFT nCoV-2019_91_RIGHT +nCoV-2019_92_LEFT nCoV-2019_92_RIGHT +nCoV-2019_93_LEFT nCoV-2019_93_RIGHT +nCoV-2019_94_LEFT nCoV-2019_94_RIGHT +nCoV-2019_95_LEFT nCoV-2019_95_RIGHT +nCoV-2019_96_LEFT nCoV-2019_96_RIGHT +nCoV-2019_97_LEFT nCoV-2019_97_RIGHT +nCoV-2019_98_LEFT nCoV-2019_98_RIGHT diff --git a/data/test_amplicon.sorted.bam b/data/test_amplicon.sorted.bam new file mode 100644 index 00000000..6ea5285e Binary files /dev/null and b/data/test_amplicon.sorted.bam differ diff --git a/data/test_isize.sorted.bam b/data/test_isize.sorted.bam index c868ac8c..4b2bd2a0 100644 Binary files a/data/test_isize.sorted.bam and b/data/test_isize.sorted.bam differ diff --git a/docs/MANUAL.md b/docs/MANUAL.md index 0874c25d..1cd247cd 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -47,6 +47,8 @@ Usage: ivar trim -i -b -p [-m ] [ Input Options Description -i (Required) Sorted bam file, with aligned reads, to trim primers and quality -b (Required) BED file with primer sequences and positions + -f Primer pair information file containing left and right primer names for the same amplicon separated by a tab + If provided, reads will be filtered based on their overlap with amplicons prior to trimming -m Minimum length of read to retain after trimming (Default: 30) -q Minimum quality threshold for sliding window to pass (Default: 20) -s Width of sliding window (Default: 4) diff --git a/src/Makefile.am b/src/Makefile.am old mode 100644 new mode 100755 index 39876e76..0b55a8db --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,9 +1,9 @@ LIBS = -lhts -lz -lpthread -CXXFLAGS = -g -std=c++11 -Wall -Wextra -Werror +CXXFLAGS = -v -g -std=c++11 -Wall -Wextra -Werror # this lists the binaries to produce, the (non-PHONY, binary) targets in # the previous manual Makefile bin_PROGRAMS = ivar -ivar_SOURCES = ivar.cpp call_consensus_pileup.cpp alignment.cpp suffix_tree.cpp trim_primer_quality.cpp remove_reads_from_amplicon.cpp call_variants.cpp primer_bed.cpp allele_functions.cpp get_masked_amplicons.cpp get_common_variants.cpp parse_gff.cpp ref_seq.cpp +ivar_SOURCES = ivar.cpp call_consensus_pileup.cpp alignment.cpp suffix_tree.cpp trim_primer_quality.cpp remove_reads_from_amplicon.cpp call_variants.cpp primer_bed.cpp allele_functions.cpp get_masked_amplicons.cpp get_common_variants.cpp parse_gff.cpp ref_seq.cpp interval_tree.cpp ivar_LDADD = $(LIBS) diff --git a/src/interval_tree.cpp b/src/interval_tree.cpp new file mode 100755 index 00000000..4906338a --- /dev/null +++ b/src/interval_tree.cpp @@ -0,0 +1,148 @@ +#include "interval_tree.h" + +// Constructor for initializing an Interval Tree +IntervalTree::IntervalTree(){ + _root = NULL; +} + +// A utility function to insert a new Interval Search Tree Node +// This is similar to BST Insert. Here the low value of interval +// is used tomaintain BST property +void IntervalTree::insert(ITNode *root, Interval data){ + // Base case: Tree is empty, new node becomes root + if(root == NULL){ + root = new ITNode(data); + _root = root; + } else { + // Get low value of interval at root + int l = root->data->low; + // If root's low value is greater, then new interval goes to + // left subtree + if (data.low < l){ + if(!root->left){ + ITNode *tmpNode = new ITNode(data); + //std::cout << data.low << ":" << data.high << "->insertLeft" << std::endl; + root->left = tmpNode; + } else { + insert(root->left, data); + } + } + else { + if(!root->right){ + ITNode *tmpNode = new ITNode(data); + //std::cout << data.low << ":" << data.high << "->insertRight" << std::endl; + root->right = tmpNode; + } else { + insert(root->right, data); + } + } + } + // update max value of ancestor node + if(root->max < data.low) + root->max = data.low; +} + + +// A utility function to check if given two intervals overlap +bool doOverlap(Interval i1, Interval i2){ + if(i1.low <= i2.low && i1.high >= i2.high) + return true; + return false; +} + + +// The main function that searches an interval i in a given +// Interval Tree. +bool IntervalTree::overlapSearch(ITNode *root, Interval i){ + // Base Case, tree is empty + //std::cout << root->data->low << ":" << root->data->high << std::endl; + if (root == NULL) return false; + + // If given interval overlaps with root + if (doOverlap(*(root->data), i)) + return true; + + // If left child of root is present and max of left child is + // greater than or equal to given interval, then i may + // overlap with an interval in left subtree + if (root->left != NULL && root->left->max >= i.low) + return overlapSearch(root->left, i); + + // Else interval can only overlap with right subtree + return overlapSearch(root->right, i); +} + +// A helper function for inorder traversal of the tree +void IntervalTree::inOrder(ITNode *root){ + if (root == NULL) return; + inOrder(root->left); + cout << "[" << root->data->low << ", " << root->data->high << "]" + << " max = " << root->max << endl; + inOrder(root->right); +} + +// A stand-alone function to create a tree containing the coordinates of each amplicon +// based on user-specified primer pairs +IntervalTree populate_amplicons(std::string pair_info_file, std::vector primers){ + int amplicon_start = -1; + int amplicon_end = -1; + IntervalTree tree = IntervalTree(); + populate_pair_indices(primers, pair_info_file); + for (auto & p : primers) { + if (p.get_strand() == '+') + { + if (p.get_pair_indice() != -1){ + amplicon_start = p.get_start(); + amplicon_end = primers[p.get_pair_indice()].get_end() + 1; + tree.insert(Interval(amplicon_start, amplicon_end)); + } + } + } + return tree; +} + + +/* +// Simple access functions to retrieve node's interval data +Interval ITNode::getData()const{ +return data; +} +// Simple access functions to retrieve node's left child +ITNode ITNode::getLeft()const{ +return left; +} +// Simple access functions to retrieve node's right child +ITNode ITNode::getRight()const{ +return right; +} +// Simple access functions to set node's left child +void ITNode::setLeft(ITNode *node){ +left = node; +} +// Simple access functions to set node's right child +void ITNode::setRight(ITNode *node){ +right = node; +} + +int main() +{ +Interval ints[6] = {Interval(15, 20), Interval(30, 10), Interval(17, 19), Interval(5, 20), Interval(12, 15), Interval(30, 40)}; +int n = sizeof(ints) / sizeof(ints[0]); +IntervalTree tree = IntervalTree(); +cout << "Hello World" << endl; +// populate interval tree +for (int i = 0; i < n; i++) +{ +tree.insert(ints[i]); +} + +tree.inOrder(); +Interval queries[4] = {Interval(15, 20), Interval(9, 30), Interval(31, 38), Interval(7, 22)}; +int num_tests = sizeof(queries) / sizeof(queries[0]); +for (int i = 0; i < num_tests; i++) +{ +cout << "Does " << queries[i].low << ":" << queries[i].high << " Overlap? " << tree.overlapSearch(queries[i]) << endl; +} +return 0; +} +*/ diff --git a/src/interval_tree.h b/src/interval_tree.h new file mode 100755 index 00000000..224f0c55 --- /dev/null +++ b/src/interval_tree.h @@ -0,0 +1,53 @@ +#include +#include "primer_bed.h" +using namespace std; + +#ifndef interval_tree +#define interval_tree + +// Structure to represent an interval +class Interval{ public: + Interval(int val1, int val2): low(std::min(val1, val2)), high(std::max(val1, val2)) {} // constructor + int low, high; +}; +// Structure to represent a node in Interval Search Tree +class ITNode{ + /* + public: + ITNode(Interval *values): data(value), left(nullptr), right(nullptr) {} // constructor + int max; + // Getters - access member functions + Interval getData()const; + ITNode getLeft()const; + ITNode getRight()const; + // Setters - access member functions + void setLeft(ITNode *node); + void setRight(ITNode *node); + */ +public: + ITNode(Interval value): data(new Interval(value)), left(nullptr), right(nullptr), max(value.low) {} // constructor + Interval *data; // pointer to node's interval data object + ITNode *left, *right; // pointer to node's left & right child node objects + int max; + +}; + +///////////////////////////////////////////////////////////////////////////////////////// +// IntervalTree class +class IntervalTree{ +private: + ITNode *_root; + void insert(ITNode *root, Interval data); + bool overlapSearch(ITNode *root, Interval data); + void inOrder(ITNode * root); + +public: + IntervalTree(); // constructor + void insert(Interval data){ insert(_root, data);} + bool overlapSearch(Interval data){ return overlapSearch(_root, data);} + void inOrder() {inOrder(_root);} +}; + +IntervalTree populate_amplicons(std::string pair_info_file, std::vector primers); + +#endif diff --git a/src/ivar.cpp b/src/ivar.cpp old mode 100644 new mode 100755 index a3c7a59f..3fbe9412 --- a/src/ivar.cpp +++ b/src/ivar.cpp @@ -16,7 +16,7 @@ #include "suffix_tree.h" #include "get_common_variants.h" -const std::string VERSION = "1.2.4"; +const std::string VERSION = "1.3"; struct args_t { std::string bam; // -i @@ -65,6 +65,8 @@ void print_trim_usage(){ "Input Options Description\n" " -i (Required) Sorted bam file, with aligned reads, to trim primers and quality\n" " -b BED file with primer sequences and positions. If no BED file is specified, only quality trimming will be done.\n" + " -f Primer pair information file containing left and right primer names for the same amplicon separated by a tab\n" + " If provided, reads will be filtered based on their overlap with amplicons prior to trimming\n" " -m Minimum length of read to retain after trimming (Default: 30)\n" " -q Minimum quality threshold for sliding window to pass (Default: 20)\n" " -s Width of sliding window (Default: 4)\n" @@ -163,7 +165,7 @@ void print_version_info(){ "\nPlease raise issues and bug reports at https://github.com/andersen-lab/ivar/\n\n"; } -static const char *trim_opt_str = "i:b:p:m:q:s:ekh?"; +static const char *trim_opt_str = "i:b:f:p:m:q:s:ekh?"; static const char *variants_opt_str = "p:t:q:m:r:g:h?"; static const char *consensus_opt_str = "i:p:q:t:m:n:kh?"; static const char *removereads_opt_str = "i:p:t:b:h?"; @@ -199,7 +201,7 @@ int main(int argc, char* argv[]){ if(i != argc-1) cl_cmd << " "; } - cl_cmd << "\n\0"; + cl_cmd << "\n\0"; std::string cmd(argv[1]); if(cmd.compare("-v") == 0){ print_version_info(); @@ -210,6 +212,7 @@ int main(int argc, char* argv[]){ argv[1] = argv[0]; argv++; argc--; + // ivar trim if (cmd.compare("trim") == 0){ g_args.min_qual = 20; g_args.sliding_window = 4; @@ -217,6 +220,7 @@ int main(int argc, char* argv[]){ g_args.write_no_primers_flag = false; g_args.keep_for_reanalysis = false; g_args.bed = ""; + g_args.primer_pair_file = ""; opt = getopt( argc, argv, trim_opt_str); while( opt != -1 ) { switch( opt ) { @@ -226,6 +230,9 @@ int main(int argc, char* argv[]){ case 'b': g_args.bed = optarg; break; + case 'f': + g_args.primer_pair_file = optarg; + break; case 'p': g_args.prefix = optarg; break; @@ -257,8 +264,10 @@ int main(int argc, char* argv[]){ return -1; } g_args.prefix = get_filename_without_extension(g_args.prefix,".bam"); - res = trim_bam_qual_primer(g_args.bam, g_args.bed, g_args.prefix, g_args.region, g_args.min_qual, g_args.sliding_window, cl_cmd.str(), g_args.write_no_primers_flag, g_args.keep_for_reanalysis, g_args.min_length); - } else if (cmd.compare("variants") == 0){ + res = trim_bam_qual_primer(g_args.bam, g_args.bed, g_args.prefix, g_args.region, g_args.min_qual, g_args.sliding_window, cl_cmd.str(), g_args.write_no_primers_flag, g_args.keep_for_reanalysis, g_args.min_length, g_args.primer_pair_file); + } + // ivar variants + else if (cmd.compare("variants") == 0){ g_args.min_qual = 20; g_args.min_threshold = 0.03; g_args.min_depth = 0; @@ -314,7 +323,9 @@ int main(int argc, char* argv[]){ return -1; } res = call_variants_from_plup(std::cin, g_args.prefix, g_args.min_qual, g_args.min_threshold, g_args.min_depth, g_args.ref, g_args.gff); - } else if (cmd.compare("consensus") == 0){ + } + // ivar consensus + else if (cmd.compare("consensus") == 0){ opt = getopt( argc, argv, consensus_opt_str); g_args.seq_id = ""; g_args.min_threshold = 0; @@ -483,7 +494,7 @@ int main(int argc, char* argv[]){ break; case 'f': g_args.primer_pair_file = optarg; - break; + break; case 'p': g_args.prefix = optarg; break; diff --git a/src/trim_primer_quality.cpp b/src/trim_primer_quality.cpp old mode 100644 new mode 100755 index 284ac6f7..7408eac1 --- a/src/trim_primer_quality.cpp +++ b/src/trim_primer_quality.cpp @@ -160,13 +160,13 @@ void print_cigar(uint32_t *cigar, int nlength){ std::cout << std::endl; } -cigar_ primer_trim(bam1_t *r, int32_t new_pos, bool unpaired_rev = false){ +cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_rev = false){ uint32_t *ncigar = (uint32_t*) malloc(sizeof(uint32_t) * (r->core.n_cigar + 1)), // Maximum edit is one more element with soft mask *cigar = bam_get_cigar(r); uint32_t i = 0, j = 0; int max_del_len = 0, cig, temp, del_len = 0; bool reverse = false; - if((r->core.flag&BAM_FPAIRED) != 0 && !(abs(r->core.isize) <= abs(r->core.l_qseq))){ // If paired and isize > read length + if((r->core.flag&BAM_FPAIRED) != 0 && isize_flag){ // If paired and isize > read length if (bam_is_rev(r)){ // If -ve strand (?) max_del_len = bam_cigar2qlen(r->core.n_cigar, bam_get_cigar(r)) - get_pos_on_query(cigar, r->core.n_cigar, new_pos, r->core.pos) - 1; reverse_cigar(cigar, r->core.n_cigar); @@ -235,9 +235,9 @@ cigar_ primer_trim(bam1_t *r, int32_t new_pos, bool unpaired_rev = false){ } return { ncigar, - true, - j, - start_pos + true, + j, + start_pos }; } @@ -320,9 +320,36 @@ void add_pg_line_to_header(bam_hdr_t** hdr, char *cmd){ (*hdr)->l_text = len-1; } -int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool keep_for_reanalysis, int min_length = 30) { +// get the length of the longest primer +int get_bigger_primer(std::vector primers){ + int max_primer_len = 0; + for (auto & p : primers) { + if(max_primer_len < p.get_length()){ + max_primer_len = p.get_length(); + } + } + return max_primer_len; +} + +// check if read overlaps with any of the amplicons +bool amplicon_filter(IntervalTree amplicons, bam1_t* r){ + Interval fragment_coords = Interval(0, 1); + if(r->core.isize > 0){ + fragment_coords.low = r->core.pos; + fragment_coords.high = r->core.pos + r->core.isize; + } else { + fragment_coords.low = bam_endpos(r) + r->core.isize; + fragment_coords.high = bam_endpos(r); + } + // debugging + bool amplicon_flag = amplicons.overlapSearch(fragment_coords); + return amplicon_flag; +} + +int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool keep_for_reanalysis, int min_length = 30, std::string pair_info = "") { int retval = 0; std::vector primers; + int max_primer_len = 0; if(!bed.empty()){ primers = populate_from_file(bed); if(primers.size() == 0){ @@ -330,6 +357,12 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, return -1; } } + max_primer_len = get_bigger_primer(primers); + // get coordinates of each amplicon + IntervalTree amplicons; + if(!pair_info.empty()){ + amplicons = populate_amplicons(pair_info, primers); + } if(bam.empty()){ std::cout << "Bam file is empty." << std::endl; return -1; @@ -409,8 +442,11 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, init_cigar(&t); uint32_t primer_trim_count = 0, no_primer_counter = 0, low_quality = 0; bool unmapped_flag = false; + bool amplicon_flag = false; + bool isize_flag = true; uint32_t failed_frag_size = 0; uint32_t unmapped_counter = 0; + uint32_t amplicon_flag_ctr = 0; primer cand_primer; std::vector overlapping_primers; std::vector::iterator cit; @@ -419,27 +455,36 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, while(sam_itr_next(in, iter, aln) >= 0) { unmapped_flag = false; primer_trimmed = false; + get_overlapping_primers(aln, primers, overlapping_primers); if((aln->core.flag&BAM_FUNMAP) == 0){ // If mapped - if((aln->core.flag&BAM_FPAIRED) != 0 && !(abs(aln->core.isize) <= abs(aln->core.l_qseq))){ // If paired - /* - if((abs(aln->core.isize) <= abs(aln->core.l_qseq))){ - failed_frag_size++; + // if primer pair info provided, check if read correctly overlaps with atleast one amplicon + if(!pair_info.empty()){ + amplicon_flag = amplicon_filter(amplicons, aln); + if(!amplicon_flag){ + if (keep_for_reanalysis) { // -k (keep) option + aln->core.flag |= BAM_FQCFAIL; + if (bam_write1(out, aln) < 0) { retval = -1; goto error; } + } + amplicon_flag_ctr++; continue; - } - */ + } + } + isize_flag = (abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq); + // if reverse strand + if((aln->core.flag&BAM_FPAIRED) != 0 && isize_flag){ // If paired get_overlapping_primers(aln, primers, overlapping_primers); if(overlapping_primers.size() > 0){ // If read starts before overlapping regions (?) primer_trimmed = true; - if(bam_is_rev(aln)){ // Reverse + if(bam_is_rev(aln)){ // Reverse read cand_primer = get_min_start(overlapping_primers); // fetch reverse primer (?) - t = primer_trim(aln, cand_primer.get_start() - 1, false); - } else { // Forward + t = primer_trim(aln, isize_flag, cand_primer.get_start() - 1, false); + } else { // Forward read cand_primer = get_max_end(overlapping_primers); // fetch forward primer (?) - t = primer_trim(aln, cand_primer.get_end() + 1, false); + t = primer_trim(aln, isize_flag, cand_primer.get_end() + 1, false); aln->core.pos += t.start_pos; } replace_cigar(aln, t.nlength, t.cigar); - free(t.cigar); + free(t.cigar); // Add count to primer cit = std::find(primers.begin(), primers.end(), cand_primer); if(cit != primers.end()) @@ -460,8 +505,8 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, if(overlapping_primers.size() > 0){ primer_trimmed = true; cand_primer = get_max_end(overlapping_primers); - t = primer_trim(aln, cand_primer.get_end() + 1, false); - // Update read's left-most coordinate (?) + t = primer_trim(aln, isize_flag, cand_primer.get_end() + 1, false); + // Update read's left-most coordinate aln->core.pos += t.start_pos; replace_cigar(aln, t.nlength, t.cigar); // Add count to primer @@ -474,7 +519,7 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, if(overlapping_primers.size() > 0){ primer_trimmed = true; cand_primer = get_min_start(overlapping_primers); - t = primer_trim(aln, cand_primer.get_start() - 1, true); + t = primer_trim(aln, isize_flag, cand_primer.get_start() - 1, true); replace_cigar(aln, t.nlength, t.cigar); // Add count to primer cit = std::find(primers.begin(), primers.end(), cand_primer); @@ -482,6 +527,8 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, cit->add_read_count(1); } t = quality_trim(aln, min_qual, sliding_window); // Quality Trimming + if(bam_is_rev(aln)) // if reverse strand + aln->core.pos = t.start_pos; condense_cigar(&t); replace_cigar(aln, t.nlength, t.cigar); } @@ -557,14 +604,17 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, if(unmapped_counter > 0){ std::cout << unmapped_counter << " unmapped reads were not written to file." << std::endl; } + if(amplicon_flag_ctr > 0){ + std::cout << amplicon_flag_ctr << " reads were ignored because they did not fall within an amplicon" << std::endl; + } if(failed_frag_size > 0){ - std::cout << round_int(failed_frag_size, mapped) - << "% (" << failed_frag_size - << ") of reads had their insert size smaller than their read length" + std::cout << round_int(failed_frag_size, mapped) + << "% (" << failed_frag_size + << ") of reads had their insert size smaller than their read length" << std::endl; } -error: + error: if (retval) std::cout << "Not able to write to BAM" << std::endl; hts_itr_destroy(iter); hts_idx_destroy(idx); diff --git a/src/trim_primer_quality.h b/src/trim_primer_quality.h old mode 100644 new mode 100755 index c77ab08b..dd08ba0b --- a/src/trim_primer_quality.h +++ b/src/trim_primer_quality.h @@ -13,6 +13,7 @@ #include #include "primer_bed.h" +#include "interval_tree.h" #ifndef trim_primer_quality #define trim_primer_quality @@ -30,7 +31,7 @@ inline void free_cigar(cigar_ t) { if (t.free_cig) free(t.cigar); } void add_pg_line_to_header(bam_hdr_t** hdr, char *cmd); -int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool mark_qcfail_flag, int min_length); +int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool mark_qcfail_flag, int min_length, std::string pair_info); void free_cigar(cigar_ t); int32_t get_pos_on_query(uint32_t *cigar, uint32_t ncigar, int32_t pos, int32_t ref_start); int32_t get_pos_on_reference(uint32_t *cigar, uint32_t ncigar, uint32_t pos, uint32_t ref_start); @@ -39,10 +40,12 @@ void reverse_cigar(uint32_t *cigar, int l); double mean_quality(uint8_t *a, int s, int e); cigar_ quality_trim(bam1_t* r, uint8_t qual_threshold, uint8_t sliding_window); void print_cigar(uint32_t *cigar, int nlength); -cigar_ primer_trim(bam1_t *r, int32_t new_pos, bool unpaired_rev); +cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_rev); void replace_cigar(bam1_t *b, uint32_t n, uint32_t *cigar); void condense_cigar(cigar_ *t); void get_overlapping_primers(bam1_t* r, std::vector primers, std::vector &overlapping_primers); void get_overlapping_primers(bam1_t* r, std::vector primers, std::vector &overlapping_primers, bool unpaired_rev); +int get_bigger_primer(std::vector primers); +bool amplicon_filter(IntervalTree amplicons, bam1_t* r); #endif diff --git a/tests/Makefile.am b/tests/Makefile.am old mode 100644 new mode 100755 index 5497d956..af2527db --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -2,11 +2,11 @@ LIBS = -lhts -lz -lpthread CXXFLAGS = -g -std=c++11 -Wall -Wextra -Werror -TESTS = check_primer_trim check_trim check_quality_trim check_consensus check_allele_depth check_consensus_threshold check_consensus_min_depth check_consensus_seq_id check_primer_bed check_getmasked check_removereads check_variants check_common_variants check_unpaired_trim check_primer_trim_edge_cases check_isize_trim -check_PROGRAMS = check_primer_trim check_trim check_quality_trim check_consensus check_allele_depth check_consensus_threshold check_consensus_min_depth check_consensus_seq_id check_primer_bed check_getmasked check_removereads check_variants check_common_variants check_unpaired_trim check_primer_trim_edge_cases check_isize_trim -check_primer_trim_SOURCES = test_primer_trim.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp -check_trim_SOURCES = test_trim.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp -check_quality_trim_SOURCES = check_quality_trim.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp +TESTS = check_primer_trim check_trim check_quality_trim check_consensus check_allele_depth check_consensus_threshold check_consensus_min_depth check_consensus_seq_id check_primer_bed check_getmasked check_removereads check_variants check_common_variants check_unpaired_trim check_primer_trim_edge_cases check_isize_trim check_interval_tree check_amplicon_search +check_PROGRAMS = check_primer_trim check_trim check_quality_trim check_consensus check_allele_depth check_consensus_threshold check_consensus_min_depth check_consensus_seq_id check_primer_bed check_getmasked check_removereads check_variants check_common_variants check_unpaired_trim check_primer_trim_edge_cases check_isize_trim check_interval_tree check_amplicon_search +check_primer_trim_SOURCES = test_primer_trim.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp ../src/interval_tree.cpp +check_trim_SOURCES = test_trim.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp ../src/interval_tree.cpp +check_quality_trim_SOURCES = check_quality_trim.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp ../src/interval_tree.cpp check_consensus_SOURCES = test_call_consensus_from_plup.cpp ../src/call_consensus_pileup.cpp ../src/allele_functions.cpp check_allele_depth_SOURCES = test_allele_depth.cpp ../src/allele_functions.cpp check_consensus_threshold_SOURCES = test_consensus_threshold.cpp ../src/call_consensus_pileup.cpp ../src/allele_functions.cpp @@ -16,7 +16,9 @@ check_variants_SOURCES = test_variants.cpp ../src/call_variants.cpp ../src/allel check_common_variants_SOURCES = test_common_variants.cpp ../src/get_common_variants.cpp check_primer_bed_SOURCES = test_primer_bed.cpp ../src/primer_bed.cpp check_getmasked_SOURCES = test_getmasked.cpp ../src/get_masked_amplicons.cpp ../src/primer_bed.cpp -check_removereads_SOURCES = test_removereads.cpp ../src/remove_reads_from_amplicon.cpp ../src/primer_bed.cpp ../src/trim_primer_quality.cpp -check_unpaired_trim_SOURCES = test_unpaired_trim.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp -check_primer_trim_edge_cases_SOURCES = test_primer_trim_edge_cases.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp -check_isize_trim_SOURCES = test_isize_trim.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp \ No newline at end of file +check_removereads_SOURCES = test_removereads.cpp ../src/remove_reads_from_amplicon.cpp ../src/primer_bed.cpp ../src/trim_primer_quality.cpp ../src/interval_tree.cpp +check_unpaired_trim_SOURCES = test_unpaired_trim.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp ../src/interval_tree.cpp +check_primer_trim_edge_cases_SOURCES = test_primer_trim_edge_cases.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp ../src/interval_tree.cpp +check_isize_trim_SOURCES = test_isize_trim.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp ../src/interval_tree.cpp +check_interval_tree_SOURCES = test_interval_tree.cpp ../src/primer_bed.cpp ../src/interval_tree.cpp +check_amplicon_search_SOURCES = test_amplicon_search.cpp ../src/trim_primer_quality.cpp ../src/primer_bed.cpp ../src/interval_tree.cpp \ No newline at end of file diff --git a/tests/check_quality_trim.cpp b/tests/check_quality_trim.cpp old mode 100644 new mode 100755 diff --git a/tests/test_allele_depth.cpp b/tests/test_allele_depth.cpp old mode 100644 new mode 100755 diff --git a/tests/test_amplicon_search.cpp b/tests/test_amplicon_search.cpp new file mode 100755 index 00000000..5bd705a6 --- /dev/null +++ b/tests/test_amplicon_search.cpp @@ -0,0 +1,47 @@ +#include +#include + +#include "../src/trim_primer_quality.h" +#include "../src/primer_bed.h" +#include "../src/interval_tree.h" + +int test_amplicon_search(std::string bam_file, std::string bed_file, std::string pair_info_file, bool expected[]) +{ + std::vector primers; + IntervalTree amplicons; + primers = populate_from_file(bed_file); + std::cout << "Total Number of primers: " << primers.size() << std::endl; + amplicons = populate_amplicons(pair_info_file, primers); + samFile *in = hts_open(bam_file.c_str(), "r"); + sam_hdr_t *hdr = sam_hdr_read(in); + bam1_t *aln = bam_init1(); + int cnt = 0; + int res = 0; + while(sam_read1(in, hdr, aln) >= 0) { + res = amplicon_filter(amplicons, aln); + if(res != expected[cnt]){ + std::cout << "Amplicon filter output did not match " << bam_get_qname(aln) << " Expected " << expected[cnt] << " " + << "Got " << res << std::endl; + std::cout << "Read intervals: " << aln->core.pos << ":" << bam_endpos(aln) << std::endl; + return -1; + } + cnt++; + } + return 0; +} + + +int main(){ + int success; + std::string bam = "../data/test_amplicon.sorted.bam"; + std::string pair_indice = "../data/pair_info_2.tsv"; + std::string bed = "../data/test_isize.bed"; + std::string prefix = "/tmp/data/trim_amplicon"; + std::string bam_out = "/tmp/trim_amplicon.bam"; + + IntervalTree amplicons; + bool expected[8] = {true, false, true, false, true, false, true, false}; + success = test_amplicon_search(bam, bed, pair_indice, expected); + amplicons.inOrder(); + return success; +} diff --git a/tests/test_call_consensus_from_plup.cpp b/tests/test_call_consensus_from_plup.cpp old mode 100644 new mode 100755 diff --git a/tests/test_common_variants.cpp b/tests/test_common_variants.cpp old mode 100644 new mode 100755 diff --git a/tests/test_consensus_min_depth.cpp b/tests/test_consensus_min_depth.cpp old mode 100644 new mode 100755 diff --git a/tests/test_consensus_seq_id.cpp b/tests/test_consensus_seq_id.cpp old mode 100644 new mode 100755 diff --git a/tests/test_consensus_threshold.cpp b/tests/test_consensus_threshold.cpp old mode 100644 new mode 100755 diff --git a/tests/test_getmasked.cpp b/tests/test_getmasked.cpp old mode 100644 new mode 100755 diff --git a/tests/test_interval_tree.cpp b/tests/test_interval_tree.cpp new file mode 100755 index 00000000..06214ed6 --- /dev/null +++ b/tests/test_interval_tree.cpp @@ -0,0 +1,41 @@ +#include +#include "../src/primer_bed.h" +#include "../src/interval_tree.h" + + +int test_itree_overlap(IntervalTree tree, Interval queries[], int num_tests, bool expected[]){ + int result = 0; + for (int i = 0; i < num_tests; i++) + { + result = tree.overlapSearch(queries[i]); + if (result != expected[i]) + { + std::cout << "Interval Tree overlap behavior incorrect for interval " << queries[i].low << ":" << queries[i].high + << " - " << "Expected: " << expected[i] << "Got: " << result << std::endl; + return 1; + } + } + return 0; +} + + +int main() +{ + int result = 0; + Interval ints[6] = {Interval(15, 20), Interval(30, 10), Interval(17, 19), Interval(5, 20), Interval(12, 15), Interval(30, 40)}; + int n = sizeof(ints) / sizeof(ints[0]); + IntervalTree tree = IntervalTree(); + // populate interval tree + for (int i = 0; i < n; i++) + { + tree.insert(ints[i]); + } + std::cout << "In order traversal of Interval Tree:" << std::endl; + tree.inOrder(); + + Interval queries[4] = {Interval(15, 20), Interval(9, 30), Interval(31, 38), Interval(7, 22)}; + bool expected[4] = {true, false, true, false}; + int num_tests = sizeof(queries) / sizeof(queries[0]); + result = test_itree_overlap(tree, queries, num_tests, expected); + return result; +} \ No newline at end of file diff --git a/tests/test_isize_trim.cpp b/tests/test_isize_trim.cpp old mode 100644 new mode 100755 index f7f0e006..6deaf1fa --- a/tests/test_isize_trim.cpp +++ b/tests/test_isize_trim.cpp @@ -1,6 +1,5 @@ #include #include "../src/trim_primer_quality.h" -#include "../src/primer_bed.h" #include "htslib/sam.h" /* @@ -11,7 +10,7 @@ typedef struct { */ -int test_isize_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag, bool keep_for_reanalysis, int min_length, std::string testname, uint8_t cigar_flag[12][3], uint32_t cigar_len[12][3]){ +int test_isize_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag, bool keep_for_reanalysis, int min_length, std::string testname, uint8_t cigar_flag[14][3], uint32_t cigar_len[14][3]){ int success = 0; int res; @@ -21,15 +20,16 @@ int test_isize_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag std::string bam = "../data/test_isize.sorted.bam"; std::string bed = "../data/test_isize.bed"; - std::string prefix = "/tmp/trim_isize"; - std::string bam_out = "/tmp/trim_isize.bam"; + std::string pair_info = ""; + std::string prefix = "../data/trim_isize"; + std::string bam_out = "../data/trim_isize.bam"; std::string region_ = ""; std::string cmd = "@PG\tID:ivar-trim\tPN:ivar\tVN:1.0.0\tCL:ivar trim\n"; // Test and check result - res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length); + res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info); if (res) { success = -1; @@ -79,7 +79,7 @@ int main() { int success = 0; int test_res = 0; - uint8_t cigar_flag[12][3] = { + uint8_t cigar_flag[14][3] = { {BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CSOFT_CLIP}, {BAM_CSOFT_CLIP, BAM_CMATCH}, {BAM_CSOFT_CLIP, BAM_CMATCH}, @@ -91,10 +91,12 @@ int main() { {BAM_CMATCH, BAM_CSOFT_CLIP}, {BAM_CMATCH, BAM_CSOFT_CLIP}, {BAM_CMATCH, BAM_CSOFT_CLIP}, + {BAM_CMATCH, BAM_CSOFT_CLIP}, + {BAM_CMATCH, BAM_CSOFT_CLIP}, {BAM_CMATCH, BAM_CSOFT_CLIP} }; - uint32_t cigar_len[12][3] = { + uint32_t cigar_len[14][3] = { {24, 363, 24}, {24, 155}, {24, 155}, @@ -106,7 +108,9 @@ int main() { {156, 24}, {156, 24}, {64, 24}, - {64, 24} + {64, 24}, + {137, 10}, + {137, 11} }; test_res = test_isize_trim(20, 4, false, false, 30, "default paramaters", cigar_flag, cigar_len); diff --git a/tests/test_primer_bed.cpp b/tests/test_primer_bed.cpp old mode 100644 new mode 100755 diff --git a/tests/test_primer_trim.cpp b/tests/test_primer_trim.cpp old mode 100644 new mode 100755 index 6697468c..4bede7cf --- a/tests/test_primer_trim.cpp +++ b/tests/test_primer_trim.cpp @@ -1,5 +1,6 @@ -#include +#include #include "../src/trim_primer_quality.h" +#include "../src/interval_tree.h" #include "../src/primer_bed.h" #include "htslib/sam.h" @@ -7,6 +8,8 @@ int main(){ int success = 0; std::string bam = "../data/test.unmapped.sorted.bam"; std::vector primers = populate_from_file("../data/test.bed"); + int max_primer_len = 0; + max_primer_len = get_bigger_primer(primers); std::string region_; samFile *in = hts_open(bam.c_str(), "r"); hts_idx_t *idx = sam_index_load(in, bam.c_str()); @@ -61,10 +64,12 @@ int main(){ int ctr = 0; std::vector overlapping_primers; primer cand_primer; + bool isize_flag = false; while(sam_itr_next(in, iter, aln) >= 0) { if((aln->core.flag&BAM_FUNMAP) != 0){ continue; } + isize_flag = (abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq); std::cout << bam_get_qname(aln) << std::endl; get_overlapping_primers(aln, primers, overlapping_primers); if(overlapping_primers.size() != overlapping_primer_sizes[ctr]){ @@ -75,10 +80,10 @@ int main(){ print_cigar(bam_get_cigar(aln), aln->core.n_cigar); if(bam_is_rev(aln)){ cand_primer = get_min_start(overlapping_primers); - t = primer_trim(aln, cand_primer.get_start() - 1, false); + t = primer_trim(aln, isize_flag, cand_primer.get_start() - 1, false); } else { cand_primer = get_max_end(overlapping_primers); - t = primer_trim(aln, cand_primer.get_end() + 1, false); + t = primer_trim(aln, isize_flag, cand_primer.get_end() + 1, false); aln->core.pos += t.start_pos; } if(cand_primer.get_indice() != primer_indices[primer_ctr]){ diff --git a/tests/test_primer_trim_edge_cases.cpp b/tests/test_primer_trim_edge_cases.cpp old mode 100644 new mode 100755 index 729e44e1..a1da782b --- a/tests/test_primer_trim_edge_cases.cpp +++ b/tests/test_primer_trim_edge_cases.cpp @@ -5,6 +5,8 @@ int main(){ int success = 0; std::string bam = "../data/primer_only/primer_edge_cases.bam"; std::vector primers = populate_from_file("../data/test.bed"); + int max_primer_len = 0; + max_primer_len = get_bigger_primer(primers); std::string region_; samFile *in = hts_open(bam.c_str(), "r"); hts_idx_t *idx = sam_index_load(in, bam.c_str()); @@ -33,10 +35,12 @@ int main(){ int len = 0; std::vector overlapping_primers; primer cand_primer; + bool isize_flag = false; while(sam_itr_next(in, iter, aln) >= 0) { if((aln->core.flag&BAM_FUNMAP) != 0){ continue; } + isize_flag = (abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq); len = bam_cigar2qlen(aln->core.n_cigar, bam_get_cigar(aln)); std::cout << bam_get_qname(aln) << std::endl; print_cigar(bam_get_cigar(aln), aln->core.n_cigar); @@ -44,7 +48,7 @@ int main(){ if(overlapping_primers.size() > 0){ // Forward trim cand_primer = get_max_end(overlapping_primers); - t = primer_trim(aln, cand_primer.get_end() + 1, false); + t = primer_trim(aln, isize_flag, cand_primer.get_end() + 1, false); aln->core.pos += t.start_pos; // Replace cigar replace_cigar(aln, t.nlength, t.cigar); @@ -56,7 +60,7 @@ int main(){ get_overlapping_primers(aln, primers, overlapping_primers, true); if(overlapping_primers.size() > 0){ cand_primer = get_min_start(overlapping_primers); - t = primer_trim(aln, cand_primer.get_start() - 1, true); + t = primer_trim(aln, isize_flag, cand_primer.get_start() - 1, true); aln->core.pos += t.start_pos; // Replace cigar replace_cigar(aln, t.nlength, t.cigar); diff --git a/tests/test_ref_seq.cpp b/tests/test_ref_seq.cpp old mode 100644 new mode 100755 diff --git a/tests/test_removereads.cpp b/tests/test_removereads.cpp old mode 100644 new mode 100755 diff --git a/tests/test_trim.cpp b/tests/test_trim.cpp old mode 100644 new mode 100755 index eef8286b..e0673557 --- a/tests/test_trim.cpp +++ b/tests/test_trim.cpp @@ -1,5 +1,6 @@ #include #include "../src/trim_primer_quality.h" +#include "../src/interval_tree.h" #include "../src/primer_bed.h" #include "htslib/sam.h" @@ -19,6 +20,7 @@ int test_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag, bool std::string bam = "../data/test.unmapped.sorted.bam"; std::string bed = "../data/test.bed"; + std::string pair_info = ""; std::string prefix = "/tmp/trim"; std::string bam_out = "/tmp/trim.bam"; @@ -26,7 +28,7 @@ int test_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag, bool std::string cmd = "@PG\tID:ivar-trim\tPN:ivar\tVN:1.0.0\tCL:ivar trim\n"; // Test and check result - res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length); + res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info); if (res) { success = -1; std::cerr << testname << " failed: trim_bam_qual_primer() returned " << res << std::endl; diff --git a/tests/test_unpaired_trim.cpp b/tests/test_unpaired_trim.cpp old mode 100644 new mode 100755 index adb7c4e6..c4451167 --- a/tests/test_unpaired_trim.cpp +++ b/tests/test_unpaired_trim.cpp @@ -1,12 +1,15 @@ #include #include "../src/trim_primer_quality.h" #include "../src/primer_bed.h" +#include "../src/interval_tree.h" #include "htslib/sam.h" int main(){ int success = 0; std::string bam = "../data/test.sim.merged.sorted.bam"; std::vector primers = populate_from_file("../data/test_merged.bed"); + int max_primer_len = 0; + max_primer_len = get_bigger_primer(primers); std::string region_; samFile *in = hts_open(bam.c_str(), "r"); hts_idx_t *idx = sam_index_load(in, bam.c_str()); @@ -73,17 +76,19 @@ int main(){ unsigned int rev_overlapping_primer_sizes[] = {2, 0, 0, 0, 1}; std::vector overlapping_primers; primer cand_primer; + bool isize_flag = false; while(sam_itr_next(in, iter, aln) >= 0) { if((aln->core.flag&BAM_FUNMAP) != 0){ continue; } + isize_flag = (abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq); std::cout << bam_get_qname(aln) << std::endl; std::cout << std::endl << "Forward" << std::endl; get_overlapping_primers(aln, primers, overlapping_primers, false); // Forward primer if(overlapping_primers.size() > 0){ cand_primer = get_max_end(overlapping_primers); - t = primer_trim(aln, cand_primer.get_end() + 1, false); + t = primer_trim(aln, isize_flag, cand_primer.get_end() + 1, false); aln->core.pos += t.start_pos; replace_cigar(aln, t.nlength, t.cigar); if(overlapping_primers.size() != fwd_overlapping_primer_sizes[primer_ctr]){ @@ -101,7 +106,7 @@ int main(){ if(overlapping_primers.size() > 0){ cand_primer = get_min_start(overlapping_primers); free_cigar(t); - t = primer_trim(aln, cand_primer.get_start() - 1, true); + t = primer_trim(aln, isize_flag, cand_primer.get_start() - 1, true); replace_cigar(aln, t.nlength, t.cigar); if(overlapping_primers.size() != rev_overlapping_primer_sizes[primer_ctr]){ success = -1; diff --git a/tests/test_variants.cpp b/tests/test_variants.cpp old mode 100644 new mode 100755