From 6fb1d2c6ebd000379a815ab8b20fc3955d69ffaf Mon Sep 17 00:00:00 2001 From: Andrea Telatin Date: Thu, 4 Mar 2021 18:16:47 +0000 Subject: [PATCH] v12 --- bin/seqfu | Bin 1373352 -> 1373352 bytes seqfu.nimble | 2 +- src/dadaist2_mergeseqs.nim | 47 ++++++++++++++++++++++++++++++------- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/bin/seqfu b/bin/seqfu index b30f5dc3a3e07ca09fd36f831f985c40c220ed19..c1b0034a269fa6d3f3088746050bd006354cd6ee 100755 GIT binary patch delta 123 zcmZ4SGI+(y;0=42L;`#6v=>?4lDo*fQiP@WU{uWRwbPp~FtuM`VgzCk%?!jWK+Fon zY(UHo#2i4(3B+7L%nigmK+Frod_c?(!~)wdFbSsKWi)CxdmsqJLO?9M-Ryx#kq7{2 CgD{)` delta 123 zcmZ4SGI+(y;0=42L^jU;v$9YAn5g+gv!(@s`J2Q~t$NvffvNoh6C)6VXl5X00b*7l zW&>h&Am#vKP9Ww2Vs0Sj0b*Vt<^y7WAQsqufk`m+E~8<)*#ki!76M}7?Pd=|ibMe2 ChcUST diff --git a/seqfu.nimble b/seqfu.nimble index 040aea6..df98af6 100644 --- a/seqfu.nimble +++ b/seqfu.nimble @@ -1,5 +1,5 @@ # Package -version = "0.8.11" +version = "0.8.12" author = "Andrea Telatin" description = "SeqFU command-line tools" license = "MIT" diff --git a/src/dadaist2_mergeseqs.nim b/src/dadaist2_mergeseqs.nim index 95dbd11..817ebb9 100644 --- a/src/dadaist2_mergeseqs.nim +++ b/src/dadaist2_mergeseqs.nim @@ -13,14 +13,42 @@ proc handler() {.noconv.} = raise newException(EKeyboardInterrupt, "Keyboard Interrupt") setControlCHook(handler) -proc combine(x, y: string): string = +proc compare(x, y: string, maxMismatches: int): bool = + if len(x) != len(y): + return false + var + mismatches = 0 + + if x == y: + return true + for i in 0 .. x.high: - let - xSlice = x[i .. y.high] - ySlice = y[0 .. min(xSlice.high, y.high)] - if xSlice == ySlice: - return x[0 ..< i] & y - return "" + if x[i] != y[i]: + mismatches += 1 + if mismatches > maxMismatches: + return false + return true + + +proc combine(x, y: string, maxMismatches: int): string = + if maxMismatches == 0: + for i in 0 .. x.high: + let + xSlice = x[i .. y.high] + ySlice = y[0 .. min(xSlice.high, y.high)] + if xSlice == ySlice: + return x[0 ..< i] & y + return "" + else: + for i in 0 .. x.high: + let + xSlice = x[i .. y.high] + ySlice = y[0 .. min(xSlice.high, y.high)] + if compare(xSlice, ySlice, maxMismatches): + echo ">", x[0 ..< i] & y + echo "<", x + return x[0 ..< i] & y + return "" proc main(): int = let args = docopt(""" @@ -35,6 +63,7 @@ proc main(): int = -p, --pair-spacer STRING Pairs separator [default: NNNNNNNNNN] -s, --strip STRING Remove this string from sample names -n, --seq-name STRING Sequence string name [default: MD5] + -m, --max-mismatches INT Maximum allowed mismatches [default: 0] --id STRING Features column name [default: #OTU ID] --verbose Print verbose output @@ -79,6 +108,8 @@ proc main(): int = # Output: header echo (p.headers).join("\t") + let + maxMismatches = parseInt($args["--max-mismatches"]) var counter = 0 joined = 0 @@ -93,7 +124,7 @@ proc main(): int = fragments = repSeq.split($args["--pair-spacer"]) if fragments.high == 1: - merge = combine(fragments[0], fragments[1]) + merge = combine(fragments[0], fragments[1], maxMismatches) split += 1 elif args["--verbose"]: stderr.writeLine("Sequence not splittable at ", counter)