Skip to content

Commit

Permalink
Handle when seq is both ref and query
Browse files Browse the repository at this point in the history
  • Loading branch information
mooreryan committed May 16, 2017
1 parent 2bf0a3a commit 4cb98fe
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 13 deletions.
34 changes: 21 additions & 13 deletions src/pasv.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,29 @@ get_aln_posns(char* aln_outfile,
seq_header);

if (tmp_rseq->first_ref_seq == 1) {
assert(tmp_rseq->query_seq != 1);
assert(first_ref_seq_found == 0);
first_ref_seq_found = 1;
++first_ref_seq_found;
PANIC_IF(first_ref_seq_found > 1,
STD_ERR,
stderr,
"Found more than one key ref seq (%s) in file '%s'\n",
seq->name.s,
aln_outfile);

first_ref_seq = rseq_init(seq);
} else if (tmp_rseq->query_seq == 1) {
assert(query_seq_found == 0);

query_seq_found = 1;
query_seq = rseq_init(seq);
}

if (first_ref_seq_found == 1 && query_seq_found == 1) {
free(seq_header);
break;
if (tmp_rseq->query_seq == 1) {
++query_seq_found;

PANIC_IF((query_seq_found > 1 && first_ref_seq_found == 0) ||
(query_seq_found > 2 && first_ref_seq_found == 1),
STD_ERR,
stderr,
"Found more than one query seq (%s) in file '%s'\n",
seq->name.s,
aln_outfile);

query_seq = rseq_init(seq);
}

free(seq_header);
Expand Down Expand Up @@ -224,7 +232,7 @@ main(int argc, char *argv[])
char* query_fname = NULL;

static char version_banner[] =
" Version: 0.0.3\n"
" Version: 0.0.4\n"
" Copyright: 2017 Ryan Moore\n"
" Contact: [email protected]\n"
" Website: https://github.com/mooreryan/pasv\n"
Expand Down Expand Up @@ -598,7 +606,7 @@ main(int argc, char *argv[])

fprintf(outfs, "name\ttype\tspans\toligo");
for (int n = 0; n < num_key_posns; ++n) {
fprintf(outfs, "\tpos.%d", key_posns[n]);
fprintf(outfs, "\tpos.%d", key_posns[n] + 1);
}
fprintf(outfs, "\n");
int num_ref_seqs = tommy_array_size(ref_seqs);
Expand Down
14 changes: 14 additions & 0 deletions src/rseq.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,20 @@ rseq_try_insert_hashlin(rseq_t* rseq, tommy_hashlin* hash)
"Header '%s' is repeated, but the sequences it "
"represents are not equal.",
rseq->head);

/* Sometimes a seq can be repeated in both the ref and query
files. In this case it's possible that it could be the first
ref seq AND a query seq. */
/* Now, check if the first_ref_seq and query_seq flags match. If
not, the rseq flag will be added to the tmp seq's flag */
if (rseq->first_ref_seq == 1 && tmp->first_ref_seq == 0) {
tmp->first_ref_seq = 1; /* set it to match the incoming rseq */
}

if (rseq->query_seq == 1 && tmp->query_seq == 0) {
tmp->query_seq = 1;
}

} else {
tommy_hashlin_insert(hash,
&rseq->node,
Expand Down
6 changes: 6 additions & 0 deletions test_files/queries.fa
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,9 @@
MIVSDIEANALLESVTKFHCGVIYDYSTAEYVSYRPSDFGAYLDALEAEVARGGLIVFHNGHKYDVPALTKLAKLQLNREFHLPRENCIDTLVLSRLIHSNLKDTDMGLLRSGKLPGKRFGSHALEAWGYRLGEMKGEYKDDFKRMLEEQGEEYVDGMEWWNFNEEMMDYNVQDVVVTKALLEKLLSDKHYFPPEIDFTDVGYTTFWSESLEAVDIEHRAAWLLAKQERNGFPFDTKAIEELYVELAARRSELLRKLTETFGSWYQPKGGTEMFCHPRTGKPLPKYPRIKTPKVGGIFKKPKNKAQREGREPCELDTREYVAGAPYTPVEHVVFNPSSRDHIQKKLQEAGWVPTKYTDKGAPVVDDEVLEGVRVDDPEKQAAIDLIKEYLMIQKRIGQSAEGDKAWLRYVAEDGKIHGSVNPNGAVTGRATHAFPNLAQIPGVRSPYGEQCRAAFGAEHHLDGITGKPWVQAGIDASGLELRCLAHFMARFDNGEYAHEILNGDIHTKNQIAAELPTRDNAKTFIYGFLYGAGDEKIGQIVGAGKERGKELKKKFLENTPAIAALRESIQQTLVESSQWVAGEQQVKWKRRWIKGLDGRKVHVRSPHAALNTLLQSAGALICKLWIIKTEEMLVEKGLKHGWDGDFAYMAWVHDEIQVGCRTEEIAQVVIETAQEAMRWVGDHWNFRCLLDTEGKMGPNWAICH
>2 query
MSNNRPLLLLIDGHALAYRAFHALAEAGLRSSTGEPTYAVFGFTSAMLNAIEEYHPDYAAVAFDVGKTFRDDLYAEYKANRAETPAEFEQQLERIKQVLAAFDIPIYTADGYEADDVIGTLARQATERGVDVLILTGDTDTLQLVDEHVTVLLNNPYVRGSKNTTRYGVADVCARYKGLRPDQLADLRGLKGDPSDNIPGVKGIGEAGAIALLNQFGSIENLYDHLDEAPKRYQKHLEGQRDAALFSKKLATIVRDAPVTLDLPAATLADYDRSRVIAVFQELEFGASLVRRLPPSQTIAAPQALPPVEPPAPLQVDMFAPATPGPDDGPQQLTLFNDMPTPVAPVVEPPAHDAPGEYRAACNDADLEAIVTELKHASLFAFDTETRGTNPLRDDLVGIALATIPGSGWYVPLGHTTGEAQLPRERVIAALRPFFADPARSRIAHNAKFDIEVLERAGIPVAGVAFDTMLAAALLDKRRNLKDLAFYELNLAAPLESIEALIGKGKNQVTFADVPIARATPYAAADADMTLRLKPALEAKLRAAGSVADVFYRLEMPLVPVLVRMEQAGILLDVPYMRALGERMGRELEQIEQQIYAIAGQTFNINSGDQLSEVLFGPKINLPTTGLDRTRTGRYSLTAQALEELQASDTTGIIELILRHRRLSKLKSTYVDELPALVNPETGRVHTDYNQLGAATGRLSSNSPNLQNIPTRTEEGREVRRGFIAAPGHLLIAADYSQIELRVLAHMTGDPNLIQTFIEGRDIHAATAARLFGVGFSAVDKNQRRIAKTVVFGVIYGISPFGLAQRLGISREQARGLIDSLFDQFPRIRDYIDRTLDIGRSEGYVQSLFGRRRPMFDLRVSGPRRQAAEREAINHPIQSTAADIMKLAMIAVDAELQRRQMRTRMLLQVHDELIFEAPEAEVDDVVALVRERMEGVLHGMEPPFAVPLRVEIETGPNWEELTPAG
>1_e_coli ref
MVQIPQNPLILVDGSSYLYRAYHAFPPLTNSAGEPTGAMYGVLNMLRSLIMQYKPTHAAVVFDAKGKTFRDELFEHYKSHRPPMPDDLRAQIEPLHAMVKAMGLPLLAVSGVEADDVIGTLAREAEKAGRPVLISTGDKDMAQLVTPNITLINTMTNTILGPEEVVNKYGVPPELIIDFLALMGDSSDNIPGVPGVGEKTAQALLQGLGGLDTLYAEPEKIAGLSFRGAKTMAAKLEQNKEVAYLSYQLATIKTDVELELTCEQLEVQPPAAEELLGLFKKYEFKRWTADVEAGKWLQAKGVKPAARPQETSVADEAPEVTATVISYDNYVTILDEETLKEWIAKLEKAPVFAFDTETDSLDNISANLVGLSFAIEPGVAAYIPVAHDYLDAPDQISRERALELLKPLLEDEKALKVGQNLKYDRGILANYGIELRGIAFDTMLESYILNSVAGRHDMDSLAERWLKHKTITFEEIAGKGKNQLTFNQIALEEAGRYAAEDADVTLQLHLKMWPDLQKHKGPLNVFENIEMPLVPVLSRIERNGVKIDPKVLHNHSEELTLRLAELEKKAHEIAGEEFNLSSTKQLQTILFEKQGIKPLKKTPGGAPSTSEEVLEELALDYPLPKVILEYRGLAKLKSTYTDKLPLMINPKTGRVHTSYHQAVTATGRLSSTDPNLQNIPVRNEEGRRIRQAFIAPEDYVIVSADYSQIELRIMAHLSRDKGLLTAFAEGKDIHRATAAEVFGLPLETVTSEQRRSAKAINFGLIYGMSAFGLARQLNIPRKEAQKYMDLYFERYPGVLEYMERTRAQAKEQGYVETLDGRRLYLPDIKSSNGARRAAAERAAINAPMQGTAADIIKRAMIAVDAWLQAEQPRVRMIMQVHDELVFEVHKDDVDAVAKQIHQLMENCTRLDVPLLVEVGSGENWDQAH
>2_phage ref
MLYPWENMYASDIETTGLLEQMRKQAAPRLHNIGYIDVLTREETVIEWTDRKSIQAFLDTGPTLIMHNGATFDFEALRFLGYDVSKCTLIDTLFISWYLQPRRVKHGLEGYGEEFGVPKPVIEDWENQTQEEYNHRVMEDCKIQLKLWEQQYIQLLKIYKSPSEVKRFVEYLMTKARQQVIQQRTRWKLNIEKALAFKAKLEPMIKEKTDALEASMPRIPEYVIKTRPAKCHKMNGQLSATGIKWKAVCDANGLDWKDPDLAIKVLKGYKEPNAGSHVQIKDWLFSLGWEPETFKFDRNKETGETRQIPQITVKDEDGNPEICPSLHKLAERNPESGIQHLIGMGVYKHRLSVVNGFLRDVDEDGYLTARCGGLTNTLRLKHRELVNLPSIRVFGGEELRSMLEAWREDYEQLGSDLCSLEDRCKHHFQWMYDPEYVKKQLAPDYDAHLAIGVIGGFITEQESQDHKDGIKKCKQRPMFKTTNYACQYGAGVPTVARSAGCDQTTAARLHKAYWDLNWSIKEIAANTKVITVDGQMWQQNPVNKFWYSLRTEKDRFSTLCQGTGAYVFDIWCNNIIAICNERWGCDPLLSGQFHDELILQVKKGFRDLWTDLLNEAMDRTNKELKLNRDCACDVQFGDNYAEIH
>1 query
MIVSDIEANALLESVTKFHCGVIYDYSTAEYVSYRPSDFGAYLDALEAEVARGGLIVFHNGHKYDVPALTKLAKLQLNREFHLPRENCIDTLVLSRLIHSNLKDTDMGLLRSGKLPGKRFGSHALEAWGYRLGEMKGEYKDDFKRMLEEQGEEYVDGMEWWNFNEEMMDYNVQDVVVTKALLEKLLSDKHYFPPEIDFTDVGYTTFWSESLEAVDIEHRAAWLLAKQERNGFPFDTKAIEELYVELAARRSELLRKLTETFGSWYQPKGGTEMFCHPRTGKPLPKYPRIKTPKVGGIFKKPKNKAQREGREPCELDTREYVAGAPYTPVEHVVFNPSSRDHIQKKLQEAGWVPTKYTDKGAPVVDDEVLEGVRVDDPEKQAAIDLIKEYLMIQKRIGQSAEGDKAWLRYVAEDGKIHGSVNPNGAVTGRATHAFPNLAQIPGVRSPYGEQCRAAFGAEHHLDGITGKPWVQAGIDASGLELRCLAHFMARFDNGEYAHEILNGDIHTKNQIAAELPTRDNAKTFIYGFLYGAGDEKIGQIVGAGKERGKELKKKFLENTPAIAALRESIQQTLVESSQWVAGEQQVKWKRRWIKGLDGRKVHVRSPHAALNTLLQSAGALICKLWIIKTEEMLVEKGLKHGWDGDFAYMAWVHDEIQVGCRTEEIAQVVIETAQEAMRWVGDHWNFRCLLDTEGKMGPNWAICH
4 changes: 4 additions & 0 deletions test_files/refs.fa
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@
MVQIPQNPLILVDGSSYLYRAYHAFPPLTNSAGEPTGAMYGVLNMLRSLIMQYKPTHAAVVFDAKGKTFRDELFEHYKSHRPPMPDDLRAQIEPLHAMVKAMGLPLLAVSGVEADDVIGTLAREAEKAGRPVLISTGDKDMAQLVTPNITLINTMTNTILGPEEVVNKYGVPPELIIDFLALMGDSSDNIPGVPGVGEKTAQALLQGLGGLDTLYAEPEKIAGLSFRGAKTMAAKLEQNKEVAYLSYQLATIKTDVELELTCEQLEVQPPAAEELLGLFKKYEFKRWTADVEAGKWLQAKGVKPAARPQETSVADEAPEVTATVISYDNYVTILDEETLKEWIAKLEKAPVFAFDTETDSLDNISANLVGLSFAIEPGVAAYIPVAHDYLDAPDQISRERALELLKPLLEDEKALKVGQNLKYDRGILANYGIELRGIAFDTMLESYILNSVAGRHDMDSLAERWLKHKTITFEEIAGKGKNQLTFNQIALEEAGRYAAEDADVTLQLHLKMWPDLQKHKGPLNVFENIEMPLVPVLSRIERNGVKIDPKVLHNHSEELTLRLAELEKKAHEIAGEEFNLSSTKQLQTILFEKQGIKPLKKTPGGAPSTSEEVLEELALDYPLPKVILEYRGLAKLKSTYTDKLPLMINPKTGRVHTSYHQAVTATGRLSSTDPNLQNIPVRNEEGRRIRQAFIAPEDYVIVSADYSQIELRIMAHLSRDKGLLTAFAEGKDIHRATAAEVFGLPLETVTSEQRRSAKAINFGLIYGMSAFGLARQLNIPRKEAQKYMDLYFERYPGVLEYMERTRAQAKEQGYVETLDGRRLYLPDIKSSNGARRAAAERAAINAPMQGTAADIIKRAMIAVDAWLQAEQPRVRMIMQVHDELVFEVHKDDVDAVAKQIHQLMENCTRLDVPLLVEVGSGENWDQAH
>2_phage ref
MLYPWENMYASDIETTGLLEQMRKQAAPRLHNIGYIDVLTREETVIEWTDRKSIQAFLDTGPTLIMHNGATFDFEALRFLGYDVSKCTLIDTLFISWYLQPRRVKHGLEGYGEEFGVPKPVIEDWENQTQEEYNHRVMEDCKIQLKLWEQQYIQLLKIYKSPSEVKRFVEYLMTKARQQVIQQRTRWKLNIEKALAFKAKLEPMIKEKTDALEASMPRIPEYVIKTRPAKCHKMNGQLSATGIKWKAVCDANGLDWKDPDLAIKVLKGYKEPNAGSHVQIKDWLFSLGWEPETFKFDRNKETGETRQIPQITVKDEDGNPEICPSLHKLAERNPESGIQHLIGMGVYKHRLSVVNGFLRDVDEDGYLTARCGGLTNTLRLKHRELVNLPSIRVFGGEELRSMLEAWREDYEQLGSDLCSLEDRCKHHFQWMYDPEYVKKQLAPDYDAHLAIGVIGGFITEQESQDHKDGIKKCKQRPMFKTTNYACQYGAGVPTVARSAGCDQTTAARLHKAYWDLNWSIKEIAANTKVITVDGQMWQQNPVNKFWYSLRTEKDRFSTLCQGTGAYVFDIWCNNIIAICNERWGCDPLLSGQFHDELILQVKKGFRDLWTDLLNEAMDRTNKELKLNRDCACDVQFGDNYAEIH
>1_e_coli ref
MVQIPQNPLILVDGSSYLYRAYHAFPPLTNSAGEPTGAMYGVLNMLRSLIMQYKPTHAAVVFDAKGKTFRDELFEHYKSHRPPMPDDLRAQIEPLHAMVKAMGLPLLAVSGVEADDVIGTLAREAEKAGRPVLISTGDKDMAQLVTPNITLINTMTNTILGPEEVVNKYGVPPELIIDFLALMGDSSDNIPGVPGVGEKTAQALLQGLGGLDTLYAEPEKIAGLSFRGAKTMAAKLEQNKEVAYLSYQLATIKTDVELELTCEQLEVQPPAAEELLGLFKKYEFKRWTADVEAGKWLQAKGVKPAARPQETSVADEAPEVTATVISYDNYVTILDEETLKEWIAKLEKAPVFAFDTETDSLDNISANLVGLSFAIEPGVAAYIPVAHDYLDAPDQISRERALELLKPLLEDEKALKVGQNLKYDRGILANYGIELRGIAFDTMLESYILNSVAGRHDMDSLAERWLKHKTITFEEIAGKGKNQLTFNQIALEEAGRYAAEDADVTLQLHLKMWPDLQKHKGPLNVFENIEMPLVPVLSRIERNGVKIDPKVLHNHSEELTLRLAELEKKAHEIAGEEFNLSSTKQLQTILFEKQGIKPLKKTPGGAPSTSEEVLEELALDYPLPKVILEYRGLAKLKSTYTDKLPLMINPKTGRVHTSYHQAVTATGRLSSTDPNLQNIPVRNEEGRRIRQAFIAPEDYVIVSADYSQIELRIMAHLSRDKGLLTAFAEGKDIHRATAAEVFGLPLETVTSEQRRSAKAINFGLIYGMSAFGLARQLNIPRKEAQKYMDLYFERYPGVLEYMERTRAQAKEQGYVETLDGRRLYLPDIKSSNGARRAAAERAAINAPMQGTAADIIKRAMIAVDAWLQAEQPRVRMIMQVHDELVFEVHKDDVDAVAKQIHQLMENCTRLDVPLLVEVGSGENWDQAH
>2_phage ref
MLYPWENMYASDIETTGLLEQMRKQAAPRLHNIGYIDVLTREETVIEWTDRKSIQAFLDTGPTLIMHNGATFDFEALRFLGYDVSKCTLIDTLFISWYLQPRRVKHGLEGYGEEFGVPKPVIEDWENQTQEEYNHRVMEDCKIQLKLWEQQYIQLLKIYKSPSEVKRFVEYLMTKARQQVIQQRTRWKLNIEKALAFKAKLEPMIKEKTDALEASMPRIPEYVIKTRPAKCHKMNGQLSATGIKWKAVCDANGLDWKDPDLAIKVLKGYKEPNAGSHVQIKDWLFSLGWEPETFKFDRNKETGETRQIPQITVKDEDGNPEICPSLHKLAERNPESGIQHLIGMGVYKHRLSVVNGFLRDVDEDGYLTARCGGLTNTLRLKHRELVNLPSIRVFGGEELRSMLEAWREDYEQLGSDLCSLEDRCKHHFQWMYDPEYVKKQLAPDYDAHLAIGVIGGFITEQESQDHKDGIKKCKQRPMFKTTNYACQYGAGVPTVARSAGCDQTTAARLHKAYWDLNWSIKEIAANTKVITVDGQMWQQNPVNKFWYSLRTEKDRFSTLCQGTGAYVFDIWCNNIIAICNERWGCDPLLSGQFHDELILQVKKGFRDLWTDLLNEAMDRTNKELKLNRDCACDVQFGDNYAEIH

0 comments on commit 4cb98fe

Please sign in to comment.