Skip to content

Commit

Permalink
minorfix
Browse files Browse the repository at this point in the history
  • Loading branch information
telatin committed Jun 5, 2022
1 parent df40240 commit f289eaa
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 29 deletions.
16 changes: 14 additions & 2 deletions src/fastx_stats.nim
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,19 @@ Sample col1 col2 col3 col4 col5 col6 col7 col8 col9
let
outputTable = newUnicodeTable()
headerFields = @["File", "#Seq", "Total bp","Avg", "N50", "N75", "N90", "auN", "Min", "Max"]

var
opt : statsOptions = (
absolute: bool(args["--abs-path"]),
basename: bool(args["--basename"]),
precision: 2,
thousands: false,
header: true,
gc: false,
scaffolds: false,
delim: sep,
fields: @[]
)

if nice:
outputTable.separateRows = false
outputTable.setHeaders(headerFields)
Expand All @@ -102,7 +114,7 @@ Sample col1 col2 col3 col4 col5 col6 col7 col8 col9
continue

var
stats = getFastxStats(filename)
stats = getFastxStats(filename, opt)

var rendername = if printBasename: $getBasename(filename)
else: filename
Expand Down
15 changes: 1 addition & 14 deletions src/fastx_stats2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,6 @@ import ./seqfu_utils
import ./stats_utils
import algorithm

type
statsOptions = tuple[
absolute: bool,
basename: bool,
precision: int,
thousands: bool,
header: bool,
gc: bool,
scaffolds: bool,
delim: string,
fields: seq[string]
]


proc toSequence(s: FastxStats, o: statsOptions): seq[string] =
var
Expand Down Expand Up @@ -210,7 +197,7 @@ Sample col1 col2 col3 col4 col5 col6 col7 col8 col9 col10
continue

var
stats = getFastxStats(filename)
stats = getFastxStats(filename, opt)

if printBasename:
stats.filename = getBasename(stats.filename)
Expand Down
23 changes: 19 additions & 4 deletions src/stats_utils.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,19 @@ import seqfu_utils
type
FastxStats* = tuple[filename: string, count, sum, min, max, n25, n50, n75, n90: int, gc, auN, avg: float]

type
statsOptions* = tuple[
absolute: bool,
basename: bool,
precision: int,
thousands: bool,
header: bool,
gc: bool,
scaffolds: bool,
delim: string,
fields: seq[string]
]

proc toTable*(s: FastxStats): Table[string, string] =
result["Filename"] = s.filename
result["Total"] = $s.sum
Expand All @@ -22,7 +35,7 @@ proc toTable*(s: FastxStats): Table[string, string] =
result["AuN"] = $s.auN
result["gc"] = $s.gc

proc getFastxStats*(filename: string): FastxStats {.discardable.} =
proc getFastxStats*(filename: string, o: statsOptions): FastxStats {.discardable.} =
result.filename = filename
var
totalBases = 0
Expand All @@ -37,9 +50,11 @@ proc getFastxStats*(filename: string): FastxStats {.discardable.} =
try:
for r in readfq(filename):
var ctgLen = len(r.sequence)
let nucleotides = count_all(r.sequence)
gc += nucleotides.gc
realLen += nucleotides.tot
if o.gc:
let nucleotides = count_all(r.sequence)
gc += nucleotides.gc
realLen += nucleotides.tot

if not (ctgLen in ctgSizes):
ctgSizes[ctgLen] = 1
else:
Expand Down
18 changes: 9 additions & 9 deletions test/test-stats.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

# Single file
TMP=$(mktemp)
STATS=$("$BINDIR"/seqfu stats --basename $iAmpli > $TMP)
STATS=$("$BINDIR"/seqfu stats --basename "$iAmpli" > "$TMP")

WC=$(cat "$TMP" | wc -l | grep -o '\d\+')
SEQS=$(cat "$TMP" | tail -n 1 | cut -f 2)
TOT=$(cat "$TMP" | tail -n 1 | cut -f 3)
N50=$(cat "$TMP" | tail -n 1 | cut -f 5)

MSG="Checking normal output expecting 2 lines: <$WC>"
MSG="Checking normal output expecting 2 lines: <"$WC">"
if [[ $WC == 2 ]]; then
echo -e "$OK: $MSG"
PASS=$((PASS+1))
Expand All @@ -20,7 +20,7 @@ fi


MSG="Checking normal output expecting total seqs 78730: <$SEQS>"
if [[ $SEQS == 78730 ]]; then
if [[ "$SEQS" == 78730 ]]; then
echo -e "$OK: $MSG"
PASS=$((PASS+1))
else
Expand All @@ -29,7 +29,7 @@ else
fi

MSG="Checking normal output expecting total bases 24299931: <$TOT>"
if [[ $TOT == 24299931 ]]; then
if [[ "$TOT" == 24299931 ]]; then
echo -e "$OK: $MSG"
PASS=$((PASS+1))
else
Expand Down Expand Up @@ -60,7 +60,7 @@ fi
# Nice output
STATS=$("$BINDIR"/seqfu stats --basename --nice $iAmpli > $TMP)
WC=$(cat "$TMP" | grep . | wc -l | grep -o '\d\+')
if [[ $WC == 5 ]]; then
if [[ "$WC" == 5 ]]; then
echo -e "$OK: Checking nice output expecting 5 lines: <$WC>"
PASS=$((PASS+1))
else
Expand All @@ -70,10 +70,10 @@ fi

# Json
TMP2=$(mktemp)
STATS=$("$BINDIR"/seqfu stats --basename --json --multiqc $TMP2 $iAmpli > $TMP)
STATS=$("$BINDIR"/seqfu stats --basename --json --multiqc "$TMP2" "$iAmpli" > "$TMP")
WC=$(cat "$TMP" | grep . | wc -l | grep -o '\d\+')
WC2=$(cat "$TMP2" | grep . | wc -l | grep -o '\d\+')
if [[ $WC2 == 39 ]]; then
if [[ "$WC2" == 39 ]]; then
echo -e "$OK: Checking MultiQC output expecting 39 lines: <$WC2>"
PASS=$((PASS+1))
else
Expand All @@ -91,9 +91,9 @@ fi
# Multi file

# Default sort
"$BINDIR"/seqfu stats --basename $iAmpli $iSort $iMini > $TMP
"$BINDIR"/seqfu stats --basename "$iAmpli" "$iSort" "$iMini" > "$TMP"
# Sort by N50 descending
"$BINDIR"/seqfu stats --basename --sort n50 --reverse $iAmpli $iSort $iMini > $TMP2
"$BINDIR"/seqfu stats --basename --sort n50 --reverse "$iAmpli" "$iSort" "$iMini" > "$TMP2"

FILT=$(cat $TMP | head -n 2 | tail -n 1 | cut -f 1)
MSG="Checking default starting by 'filt': <$FILT>"
Expand Down

0 comments on commit f289eaa

Please sign in to comment.