Skip to content

Commit

Permalink
Merge pull request #88 from andersen-lab/swift_fix
Browse files Browse the repository at this point in the history
added optional -x flag for ivar trim. allows trimming of reads that occur at a specified offset position relative to the primer sequence positions
  • Loading branch information
gkarthik authored Feb 5, 2021
2 parents 0889a14 + 60e8e6b commit ce1490a
Show file tree
Hide file tree
Showing 10 changed files with 94 additions and 14 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.

AC_PREREQ([2.63])
AC_INIT([ivar], [1.3], [[email protected]])
AC_INIT([ivar], [1.3.1], [[email protected]])
AM_INIT_AUTOMAKE([foreign subdir-objects])
AC_CONFIG_HEADERS([config.h])

Expand Down
12 changes: 9 additions & 3 deletions src/ivar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "suffix_tree.h"
#include "get_common_variants.h"

const std::string VERSION = "1.3";
const std::string VERSION = "1.3.1";

struct args_t {
std::string bam; // -i
Expand All @@ -37,6 +37,7 @@ struct args_t {
char gap; // -n
bool keep_min_coverage; // -k
std::string primer_pair_file; // -f
int32_t primer_offset; // -x
std::string file_list; // -f
bool write_no_primers_flag; // -e
std::string gff; // -g
Expand Down Expand Up @@ -67,6 +68,7 @@ void print_trim_usage(){
" -b BED file with primer sequences and positions. If no BED file is specified, only quality trimming will be done.\n"
" -f [EXPERIMENTAL] Primer pair information file containing left and right primer names for the same amplicon separated by a tab\n"
" If provided, reads that do not fall within atleat one amplicon will be ignored prior to primer trimming.\n"
" -x Primer position offset (Default: 0). Reads that occur at the specified offset positions relative to primer positions will also be trimmed.\n"
" -m Minimum length of read to retain after trimming (Default: 30)\n"
" -q Minimum quality threshold for sliding window to pass (Default: 20)\n"
" -s Width of sliding window (Default: 4)\n"
Expand Down Expand Up @@ -165,7 +167,7 @@ void print_version_info(){
"\nPlease raise issues and bug reports at https://github.com/andersen-lab/ivar/\n\n";
}

static const char *trim_opt_str = "i:b:f:p:m:q:s:ekh?";
static const char *trim_opt_str = "i:b:f:x:p:m:q:s:ekh?";
static const char *variants_opt_str = "p:t:q:m:r:g:h?";
static const char *consensus_opt_str = "i:p:q:t:m:n:kh?";
static const char *removereads_opt_str = "i:p:t:b:h?";
Expand Down Expand Up @@ -221,6 +223,7 @@ int main(int argc, char* argv[]){
g_args.keep_for_reanalysis = false;
g_args.bed = "";
g_args.primer_pair_file = "";
g_args.primer_offset = 0;
opt = getopt( argc, argv, trim_opt_str);
while( opt != -1 ) {
switch( opt ) {
Expand All @@ -232,6 +235,9 @@ int main(int argc, char* argv[]){
break;
case 'f':
g_args.primer_pair_file = optarg;
break;
case 'x':
g_args.primer_offset = std::stoi(optarg);
break;
case 'p':
g_args.prefix = optarg;
Expand Down Expand Up @@ -264,7 +270,7 @@ int main(int argc, char* argv[]){
return -1;
}
g_args.prefix = get_filename_without_extension(g_args.prefix,".bam");
res = trim_bam_qual_primer(g_args.bam, g_args.bed, g_args.prefix, g_args.region, g_args.min_qual, g_args.sliding_window, cl_cmd.str(), g_args.write_no_primers_flag, g_args.keep_for_reanalysis, g_args.min_length, g_args.primer_pair_file);
res = trim_bam_qual_primer(g_args.bam, g_args.bed, g_args.prefix, g_args.region, g_args.min_qual, g_args.sliding_window, cl_cmd.str(), g_args.write_no_primers_flag, g_args.keep_for_reanalysis, g_args.min_length, g_args.primer_pair_file, g_args.primer_offset);
}
// ivar variants
else if (cmd.compare("variants") == 0){
Expand Down
71 changes: 70 additions & 1 deletion src/primer_bed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,75 @@ void print_bed_format(){
std::cout << "It requires the following columns delimited by a tab: chrom, chromStart, chromEnd, name, score, strand" << std::endl;
}

std::vector<primer> populate_from_file(std::string path, int32_t offset = 0){
std::ifstream data(path.c_str());
std::string line;
std::vector<primer> primers;
int16_t indice = 0;
while(std::getline(data,line)){ // Remove extra lineStream
std::stringstream lineStream(line);
std::string cell;
int ctr = 0;
primer p;
p.set_strand(0); // Set strand to NULL
while(std::getline(lineStream,cell,'\t')){
switch(ctr){
case 0:
p.set_region(cell);
break;
case 1:
if(std::all_of(cell.begin(), cell.end(), ::isdigit)) {
p.set_start(std::stoul(cell) - offset);
} else {
print_bed_format();
primers.clear();
return primers;
}
break;
case 2:
if(std::all_of(cell.begin(), cell.end(), ::isdigit)) {
p.set_end(std::stoul(cell) - 1 + offset); // Bed format - End is not 0 based
} else {
print_bed_format();
primers.clear();
return primers;
}
break;
case 3:
p.set_name(cell);
break;
case 4:
if(std::all_of(cell.begin(), cell.end(), ::isdigit)) {
p.set_score(stoi(cell));
} else {
print_bed_format(); // score is missing, send warning but continue populating
std::cout << "\nWARNING: The BED file provided did not have the expected score column, but iVar will continue trimming\n" << std::endl;
p.set_score(-1);
}
break;
case 5:
if(cell[0] == '+' || cell[0] == '-')
p.set_strand(cell[0]);
else {
print_bed_format();
primers.clear();
return primers;
}
}
ctr++;
}
if(indice == 0 && ctr < 6)
std::cout << "Strand not found in primer BED file so strand will not be considered for trimming" << std::endl;
p.set_indice(indice);
p.set_pair_indice(-1);
p.set_read_count(0);
primers.push_back(p);
indice++;
}
std::cout << "Found " << primers.size() << " primers in BED file" << std::endl;
return primers;
}

std::vector<primer> populate_from_file(std::string path){
std::ifstream data(path.c_str());
std::string line;
Expand Down Expand Up @@ -112,7 +181,7 @@ std::vector<primer> populate_from_file(std::string path){
break;
case 2:
if(std::all_of(cell.begin(), cell.end(), ::isdigit)) {
p.set_end(std::stoul(cell)-1); // Bed format - End is not 0 based
p.set_end(std::stoul(cell) - 1); // Bed format - End is not 0 based
} else {
print_bed_format();
primers.clear();
Expand Down
1 change: 1 addition & 0 deletions src/primer_bed.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class primer {

};

std::vector<primer> populate_from_file(std::string path, int32_t offset);
std::vector<primer> populate_from_file(std::string path);
std::vector<primer> get_primers(std::vector<primer> p, unsigned int pos);
int get_primer_indice(std::vector<primer> p, std::string name);
Expand Down
4 changes: 2 additions & 2 deletions src/trim_primer_quality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,12 +346,12 @@ bool amplicon_filter(IntervalTree amplicons, bam1_t* r){
return amplicon_flag;
}

int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool keep_for_reanalysis, int min_length = 30, std::string pair_info = "") {
int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool keep_for_reanalysis, int min_length = 30, std::string pair_info = "", int32_t primer_offset = 0) {
int retval = 0;
std::vector<primer> primers;
int max_primer_len = 0;
if(!bed.empty()){
primers = populate_from_file(bed);
primers = populate_from_file(bed, primer_offset);
if(primers.size() == 0){
std::cout << "Exiting." << std::endl;
return -1;
Expand Down
2 changes: 1 addition & 1 deletion src/trim_primer_quality.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ inline void free_cigar(cigar_ t) { if (t.free_cig) free(t.cigar); }
void add_pg_line_to_header(bam_hdr_t** hdr, char *cmd);


int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool mark_qcfail_flag, int min_length, std::string pair_info);
int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool mark_qcfail_flag, int min_length, std::string pair_info, int32_t primer_offset);
void free_cigar(cigar_ t);
int32_t get_pos_on_query(uint32_t *cigar, uint32_t ncigar, int32_t pos, int32_t ref_start);
int32_t get_pos_on_reference(uint32_t *cigar, uint32_t ncigar, uint32_t pos, uint32_t ref_start);
Expand Down
7 changes: 4 additions & 3 deletions tests/test_amplicon_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
#include "../src/primer_bed.h"
#include "../src/interval_tree.h"

int test_amplicon_search(std::string bam_file, std::string bed_file, std::string pair_info_file, bool expected[])
int test_amplicon_search(std::string bam_file, std::string bed_file, std::string pair_info_file, int32_t primer_offset, bool expected[])
{
std::vector<primer> primers;
IntervalTree amplicons;
primers = populate_from_file(bed_file);
primers = populate_from_file(bed_file, primer_offset);
std::cout << "Total Number of primers: " << primers.size() << std::endl;
amplicons = populate_amplicons(pair_info_file, primers);
samFile *in = hts_open(bam_file.c_str(), "r");
Expand All @@ -35,13 +35,14 @@ int main(){
int success;
std::string bam = "../data/test_amplicon.sorted.bam";
std::string pair_indice = "../data/pair_info_2.tsv";
int32_t primer_offset = 0;
std::string bed = "../data/test_isize.bed";
std::string prefix = "/tmp/data/trim_amplicon";
std::string bam_out = "/tmp/trim_amplicon.bam";

IntervalTree amplicons;
bool expected[8] = {true, false, true, false, true, false, true, false};
success = test_amplicon_search(bam, bed, pair_indice, expected);
success = test_amplicon_search(bam, bed, pair_indice, primer_offset, expected);
amplicons.inOrder();
return success;
}
3 changes: 2 additions & 1 deletion tests/test_isize_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ int test_isize_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag
std::string bam = "../data/test_isize.sorted.bam";
std::string bed = "../data/test_isize.bed";
std::string pair_info = "";
int32_t primer_offset = 0;
std::string prefix = "../data/trim_isize";
std::string bam_out = "../data/trim_isize.bam";

Expand All @@ -29,7 +30,7 @@ int test_isize_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag
std::string cmd = "@PG\tID:ivar-trim\tPN:ivar\tVN:1.0.0\tCL:ivar trim\n";

// Test and check result
res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info);
res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info, primer_offset);

if (res) {
success = -1;
Expand Down
3 changes: 2 additions & 1 deletion tests/test_primer_trim_edge_cases.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
int main(){
int success = 0;
std::string bam = "../data/primer_only/primer_edge_cases.bam";
std::vector<primer> primers = populate_from_file("../data/test.bed");
int32_t primer_offset = 0;
std::vector<primer> primers = populate_from_file("../data/test.bed", primer_offset);
int max_primer_len = 0;
max_primer_len = get_bigger_primer(primers);
std::string region_;
Expand Down
3 changes: 2 additions & 1 deletion tests/test_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ int test_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag, bool
std::string bam = "../data/test.unmapped.sorted.bam";
std::string bed = "../data/test.bed";
std::string pair_info = "";
int32_t primer_offset = 0;
std::string prefix = "/tmp/trim";
std::string bam_out = "/tmp/trim.bam";

std::string region_ = "";
std::string cmd = "@PG\tID:ivar-trim\tPN:ivar\tVN:1.0.0\tCL:ivar trim\n";

// Test and check result
res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info);
res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info, primer_offset);
if (res) {
success = -1;
std::cerr << testname << " failed: trim_bam_qual_primer() returned " << res << std::endl;
Expand Down

0 comments on commit ce1490a

Please sign in to comment.