Skip to content

Commit

Permalink
grep
Browse files Browse the repository at this point in the history
  • Loading branch information
telatin committed Feb 17, 2021
1 parent 5efb27a commit f808697
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 14 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
*.dmp
tax/*
tax
bin/*
Binary file modified bin/fu-primers
Binary file not shown.
Binary file modified bin/seqfu
Binary file not shown.
98 changes: 98 additions & 0 deletions src/fastx_grep.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import klib
import tables, strutils
from os import fileExists
import docopt
import ./seqfu_utils
import re

proc fastx_grep(argv: var seq[string]): int =
let args = docopt("""
Usage: grep [options] [<inputfile> ...]
Print sequences selected if they match patterns or contain oligonucleotides
Options:
-n, --name STRING String required in the sequence name
-r, --regex PATTERN Pattern to be matched in sequence name
-c, --comment Also search -n and -r in the comment
-c, --comment STRING String required in the sequence comment
-o, --oligo IUPAC Oligonucleotide required in the sequence,
using ambiguous bases and reverse complement
--max-mismatches INT Maximum mismatches allowed [default: 0]
--min-matches INT Minimum number of matches [default: oligo-length]
-v, --verbose Verbose output
--help Show this help
""", version=version(), argv=argv)

verbose = args["--verbose"]

var
files : seq[string]
matchThs = 1.0
maxMismatches = 0
minMatches = 2


try:
maxMismatches = parseInt($args["--max-mismatches"])
if $args["--min-matches"] == "oligo-length":
if $args["--oligo"] != "nil":
minMatches = len($args["--oligo"])
else:
minMatches = parseInt($args["--min-matches"])
except Exception as e:
stderr.writeLine("Error parsing parameters: oligo matches are Integer. ", e.msg)
quit(1)

if args["<inputfile>"].len() == 0:
stderr.writeLine("Waiting for STDIN... [Ctrl-C to quit, type with --help for info].")
files.add("-")
else:
for file in args["<inputfile>"]:
files.add(file)


for filename in files:
if not fileExists(filename):
stderr.writeLine("Skipping ", filename, ": not found")
continue
else:
echoVerbose(filename, verbose)

var
f = xopen[GzFile](filename)
r: FastxRecord
pattern = ".*" & $args["--regex"]

defer: f.close()

if args["--verbose"]:
if $args["--name"] != "nil":
stderr.writeLine("Name contains: ", $args["--name"])

if $args["--regex"] != "nil":
stderr.writeLine("Name matches: ", $args["--regex"])

while f.readFastx(r):
var pass = 1
var matches : seq[string]

let name = if $args["--comment"] != "nil": r.name & " " & r.comment
else: r.name

if $args["--name"] != "nil" and rfind(name, $args["--name"]) < 0:
pass = 0

if $args["--regex"] != "nil" and not match(name, re(pattern), matches):
pass = 0

if $args["--oligo"] != "nil":
let oligos = findPrimerMatches(r.seq, $args["--oligo"], matchThs, maxMismatches, minMatches)
if len(oligos[0]) == 0 and len(oligos[1]) == 0:
pass = 0

if pass == 1:
print_seq(r, nil)


16 changes: 2 additions & 14 deletions src/seqfu_utils.nim
Original file line number Diff line number Diff line change
@@ -1,22 +1,10 @@
import klib, readfq

import strutils, os
#[ Versions
BETA: 0.8.{{VERSION}}
- 2.0.0 Moved to 'seqfu2', to keep seqfu for perl utilities
- 0.4.0 Added 'tail'
- 0.3.0 Added 'stats'
- 0.2.1 Added 'head'
- 0.2.0 Improved 'count' with PE support
Initial refactoring
- 0.1.2 Added 'count' stub
- 0.1.1 Added 'derep' to dereplicate
- 0.1.0 Initial release
]#


proc version*(): string =
return "0.8.5"
return "0.8.7"



Expand Down
2 changes: 2 additions & 0 deletions src/sfu.nim
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ include ./fastx_head
include ./fastx_tail
include ./fastx_stats
include ./fastx_sort
include ./fastx_grep
#include ./fastx_fast_derep


Expand All @@ -46,6 +47,7 @@ var progs = {
"srt": fastx_sort,
"sort" : fastx_sort,
"view": fastx_view,
"grep": fastx_grep,
"head": fastx_head,
"tail": fastx_tail,
}.toTable
Expand Down

0 comments on commit f808697

Please sign in to comment.