Skip to content

Commit

Permalink
Merge pull request #80 from andersen-lab/amplicon_search
Browse files Browse the repository at this point in the history
Amplicon search
  • Loading branch information
gkarthik authored Nov 6, 2020
2 parents 29cf79d + eca96e7 commit 98c2394
Show file tree
Hide file tree
Showing 31 changed files with 548 additions and 61 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.

AC_PREREQ([2.63])
AC_INIT([ivar], [1.2.4], [[email protected]])
AC_INIT([ivar], [1.3], [[email protected]])
AM_INIT_AUTOMAKE([foreign subdir-objects])
AC_CONFIG_HEADERS([config.h])

Expand Down
110 changes: 110 additions & 0 deletions data/pair_info_2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
unk_x unk_y
nCoV-2019_1_LEFT nCoV-2019_1_RIGHT
nCoV-2019_2_LEFT nCoV-2019_2_RIGHT
nCoV-2019_3_LEFT nCoV-2019_3_RIGHT
nCoV-2019_4_LEFT nCoV-2019_4_RIGHT
nCoV-2019_5_LEFT nCoV-2019_5_RIGHT
nCoV-2019_6_LEFT nCoV-2019_6_RIGHT
nCoV-2019_7_LEFT nCoV-2019_7_RIGHT
nCoV-2019_7_LEFT_alt0 nCoV-2019_7_RIGHT_alt5
nCoV-2019_8_LEFT nCoV-2019_8_RIGHT
nCoV-2019_9_LEFT nCoV-2019_9_RIGHT
nCoV-2019_9_LEFT_alt4 nCoV-2019_9_RIGHT_alt2
nCoV-2019_10_LEFT nCoV-2019_10_RIGHT
nCoV-2019_11_LEFT nCoV-2019_11_RIGHT
nCoV-2019_12_LEFT nCoV-2019_12_RIGHT
nCoV-2019_13_LEFT nCoV-2019_13_RIGHT
nCoV-2019_14_LEFT nCoV-2019_14_RIGHT
nCoV-2019_14_LEFT_alt4 nCoV-2019_14_RIGHT_alt2
nCoV-2019_15_LEFT nCoV-2019_15_RIGHT
nCoV-2019_15_LEFT_alt1 nCoV-2019_15_RIGHT_alt3
nCoV-2019_16_LEFT nCoV-2019_16_RIGHT
nCoV-2019_17_LEFT nCoV-2019_17_RIGHT
nCoV-2019_18_LEFT nCoV-2019_18_RIGHT
nCoV-2019_18_LEFT_alt2 nCoV-2019_18_RIGHT_alt1
nCoV-2019_19_LEFT nCoV-2019_19_RIGHT
nCoV-2019_20_LEFT nCoV-2019_20_RIGHT
nCoV-2019_21_LEFT nCoV-2019_21_RIGHT
nCoV-2019_21_LEFT_alt2 nCoV-2019_21_RIGHT_alt0
nCoV-2019_22_LEFT nCoV-2019_22_RIGHT
nCoV-2019_23_LEFT nCoV-2019_23_RIGHT
nCoV-2019_24_LEFT nCoV-2019_24_RIGHT
nCoV-2019_25_LEFT nCoV-2019_25_RIGHT
nCoV-2019_26_LEFT nCoV-2019_26_RIGHT
nCoV-2019_27_LEFT nCoV-2019_27_RIGHT
nCoV-2019_28_LEFT nCoV-2019_28_RIGHT
nCoV-2019_29_LEFT nCoV-2019_29_RIGHT
nCoV-2019_30_LEFT nCoV-2019_30_RIGHT
nCoV-2019_31_LEFT nCoV-2019_31_RIGHT
nCoV-2019_32_LEFT nCoV-2019_32_RIGHT
nCoV-2019_33_LEFT nCoV-2019_33_RIGHT
nCoV-2019_34_LEFT nCoV-2019_34_RIGHT
nCoV-2019_35_LEFT nCoV-2019_35_RIGHT
nCoV-2019_36_LEFT nCoV-2019_36_RIGHT
nCoV-2019_37_LEFT nCoV-2019_37_RIGHT
nCoV-2019_38_LEFT nCoV-2019_38_RIGHT
nCoV-2019_39_LEFT nCoV-2019_39_RIGHT
nCoV-2019_40_LEFT nCoV-2019_40_RIGHT
nCoV-2019_41_LEFT nCoV-2019_41_RIGHT
nCoV-2019_42_LEFT nCoV-2019_42_RIGHT
nCoV-2019_43_LEFT nCoV-2019_43_RIGHT
nCoV-2019_44_LEFT nCoV-2019_44_RIGHT
nCoV-2019_44_LEFT_alt3 nCoV-2019_44_RIGHT_alt0
nCoV-2019_45_LEFT nCoV-2019_45_RIGHT
nCoV-2019_45_LEFT_alt2 nCoV-2019_45_RIGHT_alt7
nCoV-2019_46_LEFT nCoV-2019_46_RIGHT
nCoV-2019_46_LEFT_alt1 nCoV-2019_46_RIGHT_alt2
nCoV-2019_47_LEFT nCoV-2019_47_RIGHT
nCoV-2019_48_LEFT nCoV-2019_48_RIGHT
nCoV-2019_49_LEFT nCoV-2019_49_RIGHT
nCoV-2019_50_LEFT nCoV-2019_50_RIGHT
nCoV-2019_51_LEFT nCoV-2019_51_RIGHT
nCoV-2019_52_LEFT nCoV-2019_52_RIGHT
nCoV-2019_53_LEFT nCoV-2019_53_RIGHT
nCoV-2019_54_LEFT nCoV-2019_54_RIGHT
nCoV-2019_55_LEFT nCoV-2019_55_RIGHT
nCoV-2019_56_LEFT nCoV-2019_56_RIGHT
nCoV-2019_57_LEFT nCoV-2019_57_RIGHT
nCoV-2019_58_LEFT nCoV-2019_58_RIGHT
nCoV-2019_59_LEFT nCoV-2019_59_RIGHT
nCoV-2019_60_LEFT nCoV-2019_60_RIGHT
nCoV-2019_61_LEFT nCoV-2019_61_RIGHT
nCoV-2019_62_LEFT nCoV-2019_62_RIGHT
nCoV-2019_63_LEFT nCoV-2019_63_RIGHT
nCoV-2019_64_LEFT nCoV-2019_64_RIGHT
nCoV-2019_65_LEFT nCoV-2019_65_RIGHT
nCoV-2019_66_LEFT nCoV-2019_66_RIGHT
nCoV-2019_67_LEFT nCoV-2019_67_RIGHT
nCoV-2019_68_LEFT nCoV-2019_68_RIGHT
nCoV-2019_69_LEFT nCoV-2019_69_RIGHT
nCoV-2019_70_LEFT nCoV-2019_70_RIGHT
nCoV-2019_71_LEFT nCoV-2019_71_RIGHT
nCoV-2019_72_LEFT nCoV-2019_72_RIGHT
nCoV-2019_73_LEFT nCoV-2019_73_RIGHT
nCoV-2019_74_LEFT nCoV-2019_74_RIGHT
nCoV-2019_75_LEFT nCoV-2019_75_RIGHT
nCoV-2019_76_LEFT nCoV-2019_76_RIGHT
nCoV-2019_76_LEFT_alt3 nCoV-2019_76_RIGHT_alt0
nCoV-2019_77_LEFT nCoV-2019_77_RIGHT
nCoV-2019_78_LEFT nCoV-2019_78_RIGHT
nCoV-2019_79_LEFT nCoV-2019_79_RIGHT
nCoV-2019_80_LEFT nCoV-2019_80_RIGHT
nCoV-2019_81_LEFT nCoV-2019_81_RIGHT
nCoV-2019_82_LEFT nCoV-2019_82_RIGHT
nCoV-2019_83_LEFT nCoV-2019_83_RIGHT
nCoV-2019_84_LEFT nCoV-2019_84_RIGHT
nCoV-2019_85_LEFT nCoV-2019_85_RIGHT
nCoV-2019_86_LEFT nCoV-2019_86_RIGHT
nCoV-2019_87_LEFT nCoV-2019_87_RIGHT
nCoV-2019_88_LEFT nCoV-2019_88_RIGHT
nCoV-2019_89_LEFT nCoV-2019_89_RIGHT
nCoV-2019_89_LEFT_alt2 nCoV-2019_89_RIGHT_alt4
nCoV-2019_90_LEFT nCoV-2019_90_RIGHT
nCoV-2019_91_LEFT nCoV-2019_91_RIGHT
nCoV-2019_92_LEFT nCoV-2019_92_RIGHT
nCoV-2019_93_LEFT nCoV-2019_93_RIGHT
nCoV-2019_94_LEFT nCoV-2019_94_RIGHT
nCoV-2019_95_LEFT nCoV-2019_95_RIGHT
nCoV-2019_96_LEFT nCoV-2019_96_RIGHT
nCoV-2019_97_LEFT nCoV-2019_97_RIGHT
nCoV-2019_98_LEFT nCoV-2019_98_RIGHT
Binary file added data/test_amplicon.sorted.bam
Binary file not shown.
Binary file modified data/test_isize.sorted.bam
Binary file not shown.
2 changes: 2 additions & 0 deletions docs/MANUAL.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ Usage: ivar trim -i <input.bam> -b <primers.bed> -p <prefix> [-m <min-length>] [
Input Options Description
-i (Required) Sorted bam file, with aligned reads, to trim primers and quality
-b (Required) BED file with primer sequences and positions
-f Primer pair information file containing left and right primer names for the same amplicon separated by a tab
If provided, reads will be filtered based on their overlap with amplicons prior to trimming
-m Minimum length of read to retain after trimming (Default: 30)
-q Minimum quality threshold for sliding window to pass (Default: 20)
-s Width of sliding window (Default: 4)
Expand Down
4 changes: 2 additions & 2 deletions src/Makefile.am
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
LIBS = -lhts -lz -lpthread

CXXFLAGS = -g -std=c++11 -Wall -Wextra -Werror
CXXFLAGS = -v -g -std=c++11 -Wall -Wextra -Werror

# this lists the binaries to produce, the (non-PHONY, binary) targets in
# the previous manual Makefile
bin_PROGRAMS = ivar
ivar_SOURCES = ivar.cpp call_consensus_pileup.cpp alignment.cpp suffix_tree.cpp trim_primer_quality.cpp remove_reads_from_amplicon.cpp call_variants.cpp primer_bed.cpp allele_functions.cpp get_masked_amplicons.cpp get_common_variants.cpp parse_gff.cpp ref_seq.cpp
ivar_SOURCES = ivar.cpp call_consensus_pileup.cpp alignment.cpp suffix_tree.cpp trim_primer_quality.cpp remove_reads_from_amplicon.cpp call_variants.cpp primer_bed.cpp allele_functions.cpp get_masked_amplicons.cpp get_common_variants.cpp parse_gff.cpp ref_seq.cpp interval_tree.cpp
ivar_LDADD = $(LIBS)
148 changes: 148 additions & 0 deletions src/interval_tree.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
#include "interval_tree.h"

// Constructor for initializing an Interval Tree
IntervalTree::IntervalTree(){
_root = NULL;
}

// A utility function to insert a new Interval Search Tree Node
// This is similar to BST Insert. Here the low value of interval
// is used tomaintain BST property
void IntervalTree::insert(ITNode *root, Interval data){
// Base case: Tree is empty, new node becomes root
if(root == NULL){
root = new ITNode(data);
_root = root;
} else {
// Get low value of interval at root
int l = root->data->low;
// If root's low value is greater, then new interval goes to
// left subtree
if (data.low < l){
if(!root->left){
ITNode *tmpNode = new ITNode(data);
//std::cout << data.low << ":" << data.high << "->insertLeft" << std::endl;
root->left = tmpNode;
} else {
insert(root->left, data);
}
}
else {
if(!root->right){
ITNode *tmpNode = new ITNode(data);
//std::cout << data.low << ":" << data.high << "->insertRight" << std::endl;
root->right = tmpNode;
} else {
insert(root->right, data);
}
}
}
// update max value of ancestor node
if(root->max < data.low)
root->max = data.low;
}


// A utility function to check if given two intervals overlap
bool doOverlap(Interval i1, Interval i2){
if(i1.low <= i2.low && i1.high >= i2.high)
return true;
return false;
}


// The main function that searches an interval i in a given
// Interval Tree.
bool IntervalTree::overlapSearch(ITNode *root, Interval i){
// Base Case, tree is empty
//std::cout << root->data->low << ":" << root->data->high << std::endl;
if (root == NULL) return false;

// If given interval overlaps with root
if (doOverlap(*(root->data), i))
return true;

// If left child of root is present and max of left child is
// greater than or equal to given interval, then i may
// overlap with an interval in left subtree
if (root->left != NULL && root->left->max >= i.low)
return overlapSearch(root->left, i);

// Else interval can only overlap with right subtree
return overlapSearch(root->right, i);
}

// A helper function for inorder traversal of the tree
void IntervalTree::inOrder(ITNode *root){
if (root == NULL) return;
inOrder(root->left);
cout << "[" << root->data->low << ", " << root->data->high << "]"
<< " max = " << root->max << endl;
inOrder(root->right);
}

// A stand-alone function to create a tree containing the coordinates of each amplicon
// based on user-specified primer pairs
IntervalTree populate_amplicons(std::string pair_info_file, std::vector<primer> primers){
int amplicon_start = -1;
int amplicon_end = -1;
IntervalTree tree = IntervalTree();
populate_pair_indices(primers, pair_info_file);
for (auto & p : primers) {
if (p.get_strand() == '+')
{
if (p.get_pair_indice() != -1){
amplicon_start = p.get_start();
amplicon_end = primers[p.get_pair_indice()].get_end() + 1;
tree.insert(Interval(amplicon_start, amplicon_end));
}
}
}
return tree;
}


/*
// Simple access functions to retrieve node's interval data
Interval ITNode::getData()const{
return data;
}
// Simple access functions to retrieve node's left child
ITNode ITNode::getLeft()const{
return left;
}
// Simple access functions to retrieve node's right child
ITNode ITNode::getRight()const{
return right;
}
// Simple access functions to set node's left child
void ITNode::setLeft(ITNode *node){
left = node;
}
// Simple access functions to set node's right child
void ITNode::setRight(ITNode *node){
right = node;
}
int main()
{
Interval ints[6] = {Interval(15, 20), Interval(30, 10), Interval(17, 19), Interval(5, 20), Interval(12, 15), Interval(30, 40)};
int n = sizeof(ints) / sizeof(ints[0]);
IntervalTree tree = IntervalTree();
cout << "Hello World" << endl;
// populate interval tree
for (int i = 0; i < n; i++)
{
tree.insert(ints[i]);
}
tree.inOrder();
Interval queries[4] = {Interval(15, 20), Interval(9, 30), Interval(31, 38), Interval(7, 22)};
int num_tests = sizeof(queries) / sizeof(queries[0]);
for (int i = 0; i < num_tests; i++)
{
cout << "Does " << queries[i].low << ":" << queries[i].high << " Overlap? " << tree.overlapSearch(queries[i]) << endl;
}
return 0;
}
*/
53 changes: 53 additions & 0 deletions src/interval_tree.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include <iostream>
#include "primer_bed.h"
using namespace std;

#ifndef interval_tree
#define interval_tree

// Structure to represent an interval
class Interval{ public:
Interval(int val1, int val2): low(std::min(val1, val2)), high(std::max(val1, val2)) {} // constructor
int low, high;
};
// Structure to represent a node in Interval Search Tree
class ITNode{
/*
public:
ITNode(Interval *values): data(value), left(nullptr), right(nullptr) {} // constructor
int max;
// Getters - access member functions
Interval getData()const;
ITNode getLeft()const;
ITNode getRight()const;
// Setters - access member functions
void setLeft(ITNode *node);
void setRight(ITNode *node);
*/
public:
ITNode(Interval value): data(new Interval(value)), left(nullptr), right(nullptr), max(value.low) {} // constructor
Interval *data; // pointer to node's interval data object
ITNode *left, *right; // pointer to node's left & right child node objects
int max;

};

/////////////////////////////////////////////////////////////////////////////////////////
// IntervalTree class
class IntervalTree{
private:
ITNode *_root;
void insert(ITNode *root, Interval data);
bool overlapSearch(ITNode *root, Interval data);
void inOrder(ITNode * root);

public:
IntervalTree(); // constructor
void insert(Interval data){ insert(_root, data);}
bool overlapSearch(Interval data){ return overlapSearch(_root, data);}
void inOrder() {inOrder(_root);}
};

IntervalTree populate_amplicons(std::string pair_info_file, std::vector<primer> primers);

#endif
Loading

0 comments on commit 98c2394

Please sign in to comment.