From 217b7ea0e0491e73842d45672a9d7c2575984585 Mon Sep 17 00:00:00 2001 From: "Andrea Telatin (QIB)" Date: Wed, 3 Nov 2021 10:13:55 +0000 Subject: [PATCH] release.py automatically changed docs --- docs/releases/v1.8.3.md | 38 +++++++++++++++++++++++++++++++++++ releases/v1.8.3.splashes.json | 1 + 2 files changed, 39 insertions(+) create mode 100644 docs/releases/v1.8.3.md create mode 100644 releases/v1.8.3.splashes.json diff --git a/docs/releases/v1.8.3.md b/docs/releases/v1.8.3.md new file mode 100644 index 0000000..b97c4cf --- /dev/null +++ b/docs/releases/v1.8.3.md @@ -0,0 +1,38 @@ +# SeqFu v1.8.3 + +### Changes +* Markdown documentation improvements +* Splashscreen for *fu-virfilter* fixed +* Argument parser for _fu-cov_ improved +* Now `seqfu --version` and `seqfu version` will print the version number and exit +* Added test for _fu-cov_ +* Added citation in main command and repository + +### Splash screen +```text +SeqFu - Sequence Fastx Utilities +version: 1.8.3 + + · count [cnt] : count FASTA/FASTQ reads, pair-end aware + · deinterleave [dei] : deinterleave FASTQ + · derep [der] : feature-rich dereplication of FASTA/FASTQ files + · interleave [ilv] : interleave FASTQ pair ends + · lanes [mrl] : merge Illumina lanes + · list [lst] : print sequences from a list of names + · metadata [met] : print a table of FASTQ reads (mapping files) + · sort [srt] : sort sequences by size (uniques) + · stats [st] : statistics on sequence lengths + + · cat : concatenate FASTA/FASTQ files + · grep : select sequences with patterns + · head : print first sequences + · rc : reverse complement strings or files + · tab : tabulate reads to TSV (and viceversa) + · tail : view last sequences + · view : view sequences with colored quality and oligo matches + +Add --help after each command to print usage + +``` + + diff --git a/releases/v1.8.3.splashes.json b/releases/v1.8.3.splashes.json new file mode 100644 index 0000000..937c317 --- /dev/null +++ b/releases/v1.8.3.splashes.json @@ -0,0 +1 @@ +{"seqfu tail": "Usage: tail [options] [ ...]\n\nOptions:\n -n, --num NUM Print the first NUM sequences [default: 10]\n -k, --skip SKIP Print one sequence every SKIP [default: 0]\n -p, --prefix STRING Rename sequences with prefix + incremental number\n -s, --strip-comments Remove comments\n -b, --basename prepend basename to sequence name\n --fasta Force FASTA output\n --fastq Force FASTQ output\n --sep STRING Sequence name fields separator [default: _]\n -q, --fastq-qual INT FASTQ default quality [default: 33]\n -v, --verbose Verbose output\n -h, --help Show this help\n", "seqfu qual": "Usage: qual [options] [...] \n\nQuickly check the quality of input files returning the detected encoding \nand the profile of quality scores. \nTo read from STDIN, use - as filename.\n\n -m, --max INT Check the first INT reads [default: 5000]\n -l, --maxlen INT Maximum read length [default: 1000]\n -k, --skip INT Print one sequence every INT [default: 1]\n\nQualified position:\n -w, --wnd INT Sliding window size [default: 4]\n -q, --wnd-qual FLOAT Minimum quality in the sliding window [default: 30.0]\n -z, --min-qual FLOAT Stop the sliding windows when quality is below [default: 18.0] \n\nAdditional output:\n --gc Print GC content as extra column\n -p, --profile Quality profile per position (will comment the summary lines)\n -c, --colorbars Print graphical average quality profile\n\nOther options:\n -v, --verbose Verbose output\n -O, --offset INT Quality encoding offset [default: 33]\n --help Show this help\n", "seqfu cat": "Usage: cat [options] [ ...]\n\nConcatenate multiple FASTA or FASTQ files.\n\nOptions:\n -k, --skip SKIP Print one sequence every SKIP [default: 0]\n\nSequence name:\n -p, --prefix STRING Rename sequences with prefix + incremental number\n -z, --strip-name Remove the original sequence name\n -a, --append STRING Append this string to the sequence name [default: ]\n --sep STRING Sequence name fields separator [default: _]\n\n -b, --basename Prepend file basename to the sequence name (before prefix)\n --split CHAR Split basename at this char [default: .]\n --part INT After splitting the basename, take this part [default: 1]\n --basename-sep STRING Separate basename from the rest with this [default: _]\n\nSequence comments:\n -s, --strip-comments Remove original sequence comments\n --comment-sep CHAR Comment separator [default: ]\n --add-len Add 'len=LENGTH' to the comments\n --add-initial-len Add 'original_len=LENGTH' to the comments\n --add-gc Add 'gc=%GC' to the comments\n --add-initial-gc Add 'original_gc=%GC' to the comments\n --add-name Add 'original_name=INITIAL_NAME' to the comments\n\nFiltering:\n -m, --min-len INT Discard sequences shorter than INT [default: 1]\n -x, --max-len INT Discard sequences longer than INT, 0 to ignore [default: 0]\n --trim-front INT Trim INT base from the start of the sequence [default: 0]\n --trim-tail INT Trim INT base from the end of the sequence [default: 0]\n --truncate INT Keep only the first INT bases, 0 to ignore [default: 0]\n Negative values to print the last INT bases\n\nOutput:\n --fasta Force FASTA output\n --fastq Force FASTQ output\n --list Output a list of sequence names \n -q, --fastq-qual INT FASTQ default quality [default: 33]\n -v, --verbose Verbose output\n -h, --help Show this help\n", "seqfu rc": "Usage: rc [options] [...] \n\nPrint the reverse complementary of sequences in files or sequences\ngiven as parameters. Can read FASTA/FASTQ also from STDIN, but not\nnaked strings.\n\nOptions:\n -s, --seq-name NAME Sequence name if coming as string [default: dna]\n -r, --only-rev Reverse, do not complement\n --strip-comments Remove sequence comments\n -v, --verbose Verbose output\n --help Show this help\n", "seqfu deinterleave": "ilv: interleave FASTQ files\n\n Usage: dei [options] -o basename \n\n -o --output-basename \"str\" save output to output_R1.fq and output_R2.fq\n -f --for-ext \"R1\" extension for R1 file [default: _R1.fq]\n -r --rev-ext \"R2\" extension for R2 file [default: _R2.fq]\n -c --check enable careful mode (check sequence names and numbers)\n -v --verbose print verbose output\n\n -s --strip-comments skip comments\n -p --prefix \"string\" rename sequences (append a progressive number)\n\nnotes:\n use \"-\" as input filename to read from STDIN\n\nexample:\n\n dei -o newfile file.fq\n", "seqfu merge": "Usage: merge [options] -1 File_R1\n\n Options:\n -1, --R1 FILE First paired-end file\n -2, --R2 FILE Second paired-end file, can be automatically inferred \n -i, --minid FLOAT Minimum identity [default: 0.80]\n -m, --minlen INT Minimum overlap [default: 20]\n --accepted-identity FLOAT Accept fusion when identity is above FLOAT [default: 0.96]\n -v, --verbose Print verbose messages\n -h, --help Show this help\n", "seqfu head": "Usage: head [options] [ ...]\n\nSelect a number of sequences from the beginning of a file, allowing\nto select a fraction of the reads (for example to print 100 reads,\nselecting one every 10).\n\nOptions:\n -n, --num NUM Print the first NUM sequences [default: 10]\n -k, --skip SKIP Print one sequence every SKIP [default: 0]\n -p, --prefix STRING Rename sequences with prefix + incremental number\n -s, --strip-comments Remove comments\n -b, --basename prepend basename to sequence name\n -v, --verbose Verbose output\n --quiet Don't print warnings\n --help Show this help\n\nOutput:\n --fasta Force FASTA output\n --fastq Force FASTQ output\n --sep STRING Sequence name fields separator [default: _]\n -q, --fastq-qual INT FASTQ default quality [default: 33]\n", "seqfu list": "Usage: list [options] ...\n\nPrint sequences that are present in a list file, which\ncan contains leading \">\" or \"@\" characters.\nDuplicated entries in the list will be ignored.\n\nOther options:\n -c, --with-comments Include comments in the list file\n -p, --partial-match Allow partial matches (UNSUPPORTED)\n -m, --min-len INT Skip entries smaller than INT [default: 1]\n\n -v, --verbose Verbose output\n -r, --report Print report of found sequences\n --help Show this help\n", "seqfu view": "Usage: view [options] []\n\nView a FASTA/FASTQ file for manual inspection, allowing to search for\nan oligonucleotide.\n\nOptions:\n -o, --oligo1 OLIGO Match oligo, with ambiguous IUPAC chars allowed\n (rev. compl. search is performed), color blue\n -r, --oligo2 OLIGO Second oligo to be scanned for, color red\n -q, --qual-scale STR Quality thresholds, seven values\n separated by columns [default: 3:15:25:28:30:35:40]\n\n --match-ths FLOAT Oligo matching threshold [default: 0.75]\n --min-matches INT Oligo minimum matches\u00a0[default: 5]\n --max-mismatches INT Oligo maxmimum mismataches\u00a0[default: 2]\n --ascii Encode the quality as ASCII chars (when UNICODE is\n not available)\n -Q, --qual-chars Show quality characters instead of bars\n -n, --nocolor Disable colored output\n --verbose Show extra information\n -h, --help Show this help\n", "seqfu grep": "Usage: grep [options] [ ...]\n\nPrint sequences selected if they match patterns or contain oligonucleotides\n\nOptions:\n -n, --name STRING String required in the sequence name\n -r, --regex PATTERN Pattern to be matched in sequence name\n -c, --comment Also search -n and -r in the comment\n -o, --oligo IUPAC Oligonucleotide required in the sequence,\n using ambiguous bases and reverse complement\n -A, --append-pos Append matching positions to the sequence comment\n --max-mismatches INT Maximum mismatches allowed [default: 0]\n --min-matches INT Minimum number of matches [default: oligo-length]\n -v, --verbose Verbose output\n --help Show this help\n", "seqfu lanes": "Usage: lanes [options] -o \n\nOptions:\n -o, --outdir DIR Output directory\n -e, --extension STR File extension [default: .fastq]\n -s, --file-separator STR Field separator in filenames [default: _]\n --comment-separator STR String separating sequence name and its comment [default: TAB]\n -v, --verbose Verbose output \n -h, --help Show this help\n", "seqfu tabulate": "Usage: tabulate [options] []\n\nConvert FASTQ to TSV and viceversa. Single end is a 4 columns table (name, comment, seq, qual),\npaired end have 4 columns for the R1 and 4 columns for the R2. \nPaired end reads need to be supplied as interleaved.\n \n\nOptions:\n -i, --interleaved Input is interleaved (paired-end)\n -d, --detabulate Convert TSV to FASTQ (if reading from file is autodetected) \n -c, --comment-sep CHAR Separator between name and comment (default: tab)\n -s, --field-sep CHAR Field separator (default: tab)\n -v, --verbose Verbose output\n -h, --help Show this help\n", "seqfu stats": "Usage: stats [options] [ ...]\n\nOptions:\n -a, --abs-path Print absolute paths\n -b, --basename Print only filenames\n -n, --nice Print nice terminal table\n --csv Separate with commas (default: tabs)\n --multiqc FILE Saves a MultiQC report to FILE (suggested: name_mqc.txt)\n -v, --verbose Verbose output\n -h, --help Show this help\n", "seqfu derep": "Usage: derep [options] [ ...]\n\nOptions:\n -k, --keep-name Do not rename sequence (see -p), but use the first sequence name\n -i, --ignore-size Do not count 'size=INT;' annotations (they will be stripped in any case)\n -m, --min-size=MIN_SIZE Print clusters with size equal or bigger than INT sequences [default: 0]\n -p, --prefix=PREFIX Sequence name prefix [default: seq]\n -5, --md5 Use MD5 as sequence name (overrides other parameters)\n -j, --json=JSON_FILE Save dereplication metadata to JSON file\n -s, --separator=SEPARATOR Sequence name separator [default: .]\n -w, --line-width=LINE_WIDTH FASTA line width (0: unlimited) [default: 0]\n -l, --min-length=MIN_LENGTH Discard sequences shorter than MIN_LEN [default: 0]\n -x, --max-length=MAX_LENGTH Discard sequences longer than MAX_LEN [default: 0]\n -c, --size-as-comment Print cluster size as comment, not in sequence name\n --add-len Add length to sequence\n -v, --verbose Print verbose messages\n -h, --help Show this help\n", "seqfu metadata": "Usage: metadata [options] [ ...]\n\nPrepare mapping files from directory containing FASTQ files\n\nOptions:\n -1, --for-tag STR String found in filename of forward reads [default: _R1]\n -2, --rev-tag STR String found in filename of forward reads [default: _R2]\n -s, --split STR Separator used in filename to identify the sample ID [default: _]\n -f, --format TYPE Output format: dadaist, manifest, qiime1, qiime2, irida [default: manifest]\n -P, --project INT Project ID (only for irida)\n --pos INT... Which part of the filename is the Sample ID [default: 1]\n --pe Enforce paired-end reads (not supported)\n -p, --add-path Add the reads absolute path as column \n -c, --counts Add the number of reads as a property column\n -t, --threads INT Number of simultaneously opened files [default: 2]\n -v, --verbose Verbose output\n -h, --help Show this help\n", "seqfu interleave": "ilv: interleave FASTQ files\n\n Usage: ilv [options] -1 [-2 ]\n\n -f --for-tag string identifying forward files [default: auto]\n -r --rev-tag string identifying reverse files [default: auto]\n -o --output save file to instead of STDOUT\n -c --check enable careful mode (check sequence names and numbers)\n -v --verbose print verbose output\n\n -s --strip-comments skip comments\n -p --prefix \"string\" rename sequences (append a progressive number)\n\nguessing second file:\n by default is scanned for _R1. and substitute with _R2.\n if this fails, the patterns _1. and _2. are tested.\n\nexample:\n\n ilv -1 file_R1.fq > interleaved.fq\n", "seqfu count": "Usage: count [options] [ ...]\n\nCount sequences in paired-end aware format\n\nOptions:\n -a, --abs-path Print absolute paths\n -b, --basename Print only filenames\n -u, --unpair Print separate records for paired end files\n -f, --for-tag R1 Forward tag [default: auto]\n -r, --rev-tag R2 Reverse tag [default: auto]\n -t, --threads INT Working threads [default: 4]\n -v, --verbose Verbose output\n -h, --help Show this help\n", "seqfu sort": "Usage: sort [options] [ ...]\n\n Sort sequences by size printing only unique sequences\n\nOptions:\n -p, --prefix STRING Sequence prefix \n -s, --strip-comments Remove sequence comments\n --asc Ascending order\n -v, --verbose Verbose output\n -h, --help Show this help\n", "fu-index": "Fastx utility\n\n A program to print the Illumina INDEX of a set of FASTQ files\n\n Usage: \n fu-index [options] ...\n\n Options:\n \n -m, --max-reads INT Evaluate INT number of reads [default: 1000]\n -r, --min-ratio FLOAT Minimum ratio of matches of the top index [default: 0.85]\n --verbose Print verbose log\n --help Show help\n", "fu-cov": "fu-cov\n\n Extract contigs using coverage data from the assembler\n\n Usage: \n fu-cov [options] [...]\n\n Options:\n -c, --min-cov FLOAT Minimum coverage [default: 0.0]\n -x, --max-cov FLOAT Maximum coverage [default: 0.0]\n -l, --min-len INT Minimum contig length [default: 0]\n -y, --max-len INT Maximum contig length [default: 0]\n -t, --top INT Print the first TOP sequences when using --sort [default: 0] \n -s, --sort Store contigs in memory and sort them by descending coverage\n --verbose Print verbose log\n --help Show help\n", "fu-virfilter": "Usage: fu-virfilter [options] \n\n Files:\n VirFinder output file (csv format)\n FASTA file to filter\n\n Options:\n -p, --max-pvalue FLOAT Maximum p-value to keep [default: 0.05]\n -s, --min-score FLOAT Minimum score [default: 0.90]\n --min-len INT Minimum length [default: 100]\n --max-len INT Maximum length [default: 1000000]\n\n Other options:\n --sep CHAR Separator [default: ,]\n -v, --verbose Verbose output\n -h, --help Show this help\n", "fu-shred": "Usage: fu-shred [options] [...]\n\n Systematically produce a \"shotgun\" of input sequences. Can read from standard input.\n\n Options:\n -l, --length INT Segment length [default: 100]\n -s, --step INT Distance from one segment start to the following [default: 10] \n -q, --quality INT Quality (constant) for the segment, if -1 is \n provided will be printed in FASTA [default: 40]\n -r, --add-rc Print every other read in reverse complement\n -b, --basename Prepend the file basename to the read name\n --split-basename STRING Split the file basename at this character [default: .]\n --prefix-separator STRING Join the basename with the rest of the read name with this [default: _]\n\n -v, --verbose Verbose output\n -h, --help Show this help\n", "fu-orf": "fu-orf\n\n Extract ORFs from Paired-End reads.\n\n Usage: \n fu-orf [options] -1 File_R1.fq\n\n Options:\n -1, --R1 FILE First paired end file\n -2, --R2 FILE Second paired end file\n -m, --min-size INT Minimum ORF size (aa) [default: 25]\n -p, --prefix STRING Rename reads using this prefix\n --min-overlap INT Minimum PE overlap [default: 12]\n --max-overlap INT Maximum PE overlap [default: 200]\n --min-identity FLOAT Minimum sequence identity in overlap [default: 0.80]\n -j, --join Attempt Paired-End joining\n --pool-size INT Size of the sequences array to be processed\n by each working thread [default: 250]\n --verbose Print verbose log\n --help Show help\n", "fu-tabcheck": "fu-tabcheck\n\n A program inspect TSV and CSV files, that must contain more than 1 column.\n Double quotes are considered field delimiters, if present.\n Gzipped files are supported natively.\n\n Usage: \n fu-tabcheck [options] ...\n\n Options:\n -s --separator CHAR Character separating the values, 'tab' for tab and 'auto'\n to try tab or commas [default: auto]\n -c --comment CHAR Comment/Header char [default: #]\n --verbose Enable verbose mode\n", "fu-sw": "Usage: fu-sw [options] -q QUERY -t TARGET\n\n Options:\n -q --query File with the sequence(s) to align against target\n -t --target File with the target sequence(s)\n -i --id ID Align only against the sequence named `ID` in the target file\n -s --showaln Show graphical alignment\n \n Smith-Waterman options:\n --score-match INT Score for a match [default: 10]\n --score-mismatch INT Score for a mismatch [default: -8]\n --score-gap INT Score for a gap [default: -10]\n --min-score INT Minimum alignment score [default: 80]\n --pct-id FLOAT Minimum percentage of identity [default: 85]\n \n Other options:\n --pool-size INT Number of sequences/pairs to process per thread [default: 20]\n -v --verbose Verbose output\n -h --help Show this help\n", "fu-nanotags": "Usage: fu-nanotags [options] -q QUERY [...]\n\n Options:\n -q, --query TAGSEQ Sequence string OR file with the sequence(s) to align against reads\n -s, --showaln Show graphical alignment\n -c, --cut INT Cut input reads at INT position [default: 300]\n -x, --disable-rev-comp Do not scan reverse complemented reads\n -r, --reverse-reads Reverse complement reads that have the tags at the end (3')\n \n Alignment options:\n -i, --pct-id FLOAT Percentage of identity in the aligned region [default: 80.0]\n -m, --min-score INT Minimum alignment score (0 for auto) [default: 0]\n \n Smith-Waterman parameters:\n -M, --weight-match INT Match [default: 5]\n -X, --weight-mismatch INT Mismatch penalty [default: -3]\n -G, --weight-gap INT Gap penalty [default: -5]\n\n Other options:\n --pool-size INT Number of sequences to process per thread, not implemented [default: 25]\n -v, --verbose Verbose output\n -h, --help Show this help\n", "fu-16Sregion": "Usage: fu-16Sregion [options] []\n\n Options:\n -r --reference FILE FASTA file with a reference sequence, E. coli 16S by default\n -j --regions FILE Regions names in JSON format, E. coli variable regions by default\n -m --max-reads INT Parse up to INT reads then quit [default: 400]\n -s --min-score INT Minimum alignment score (approx. %id * readlen * matchScore) [default: 1000]\n -f --min-fraction FLOAT Minimum fraction of reads classified to report a region as detected [default: 0.25]\n \n Smith-Waterman:\n --score-match INT Score for a match [default: 10]\n --score-mismatch INT Score for a mismatch [default: -5]\n --score-gap INT Score for a gap [default: -10]\n \n Other options:\n --pool-size INT Number of sequences/pairs to process per thread [default: 1]\n --max-threads INT Maximum number of working threads [default: 128]\n -v --verbose Verbose output\n --debug Enable diagnostics\n -h --help Show this help\n", "fu-multirelabel": "SeqFu MultiRelabel\n\n A program to rename sequences from multiple files (adding the filename,\n and or numerical postfix). Will fail if multiple sequence receive the same name.\n\n Usage: \n fu-multirelabel [options] FILE...\n\n Options:\n -b, --basename Prepend file basename to sequence\n -r, --rename NAME Replace original name with NAME\n -n, --numeric-postfix Add progressive number (reset at each new basename)\n -t, --total-postfix Add progressive number (without resetting at each new input file)\n -d, --split-basename CHAR Remove the final part of basename after CHAR [default: .]\n -s, --separator STRING Separator between prefix, name, suffix [default: _]\n --no-comments Strip out comments\n --comment-separator CHAR Separate comment from name with CHAR [default: TAB]\n", "fu-homocomp": "Usage: fu-homocompress [options] [...]\n \n Remove all the homopolymers from the input sequences.\n\n Options:\n --pool-size INT Number of sequences to process per thread [default: 50]\n --max-threads INT Maxiumum number of threads to use [default: 24]\n -v, --verbose Verbose output\n -h, --help Show this help\n", "fu-primers": "Usage: fu-primers [options] -1 [-2 ]\n\n This program currently only supports paired-end Illumina reads.\n\n Options:\n -1 --first-pair First sequence in pair\n -2 --second-pair Second sequence in pair (can be guessed)\n -f --primer-for FOR Sequence of the forward primer [default: CCTACGGGNGGCWGCAG]\n -r --primer-rev REV Sequence of the reverse primer [default: GGACTACHVGGGTATCTAATCC]\n -m --min-len INT Minimum sequence length after trimming [default: 50]\n --primer-thrs FLOAT Minimum amount of matches over total length [default: 0.8]\n --primer-mismatches INT Maximum number of missmatches allowed [default: 2]\n --primer-min-matches INT Minimum numer of matches required [default: 8]\n --primer-pos-margin INT Number of bases from the extremity of the sequence allowed [default: 2]\n --pattern-R1 Tag in first pairs filenames [default: auto]\n --pattern-R2 Tag in second pairs filenames [default: auto]\n -v --verbose Verbose output\n -h --help Show this help\n"} \ No newline at end of file