From f57abfe3262c6df69b6e9224618c21077f8a9c24 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Tue, 19 Nov 2024 15:07:44 -0800 Subject: [PATCH] src/utils/xcounts.cpp: adding a flag to force gzip output even if input is not gzip --- src/utils/xcounts.cpp | 51 ++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/utils/xcounts.cpp b/src/utils/xcounts.cpp index ccadfde..13e7dc3 100644 --- a/src/utils/xcounts.cpp +++ b/src/utils/xcounts.cpp @@ -18,12 +18,12 @@ #include +#include #include #include #include -#include -#include #include +#include // from smithlab_cpp #include "OptionParser.hpp" @@ -40,13 +40,12 @@ using std::endl; using std::runtime_error; using std::string; using std::to_chars; -using std::vector; using std::to_string; +using std::vector; using bamxx::bgzf_file; - -template +template static inline uint32_t fill_output_buffer(const uint32_t offset, const MSite &s, T &buf) { auto buf_end = buf.data() + buf.size(); @@ -59,11 +58,11 @@ fill_output_buffer(const uint32_t offset, const MSite &s, T &buf) { return std::distance(buf.data(), res.ptr); } - int main_xcounts(int argc, const char **argv) { try { bool verbose = false; + bool gzip_output = false; bool require_coverage = false; size_t n_threads = 1; string genome_file; @@ -78,14 +77,17 @@ main_xcounts(int argc, const char **argv) { " (\"-\" for standard input)", 1); opt_parse.add_opt("output", 'o', "output file (default is standard out)", false, outfile); - opt_parse.add_opt("chroms", 'c', "make header from this reference", - false, genome_file); - opt_parse.add_opt("reads", 'r', "ouput only sites with reads", - false, require_coverage); - opt_parse.add_opt("header", 'h', "use this file to generate header", - false, header_file); + opt_parse.add_opt("chroms", 'c', "make header from this reference", false, + genome_file); + opt_parse.add_opt("reads", 'r', "ouput only sites with reads", false, + require_coverage); + opt_parse.add_opt("header", 'h', "use this file to generate header", false, + header_file); opt_parse.add_opt("threads", 't', "threads for compression (use few)", false, n_threads); + opt_parse.add_opt("zip", 'z', + "gzip compress output (automatic if input is gzip)", + false, gzip_output); opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose); std::vector leftover_args; opt_parse.parse(argc, argv, leftover_args); @@ -112,21 +114,22 @@ main_xcounts(int argc, const char **argv) { vector chrom_names; vector chrom_sizes; if (!genome_file.empty()) { - const int ret = - get_chrom_sizes_for_counts_header(n_threads, genome_file, - chrom_names, chrom_sizes); + const int ret = get_chrom_sizes_for_counts_header( + n_threads, genome_file, chrom_names, chrom_sizes); if (ret) throw dnmt_error{"failed to get chrom sizes from: " + genome_file}; } bamxx::bam_tpool tpool(n_threads); bgzf_file in(filename, "r"); - if (!in) throw dnmt_error{"could not open file: " + filename}; + if (!in) + throw dnmt_error{"could not open file: " + filename}; - const auto outfile_mode = in.is_compressed() ? "w" : "wu"; + const auto outfile_mode = (gzip_output || in.is_compressed()) ? "w" : "wu"; bgzf_file out(outfile, outfile_mode); - if (!out) throw dnmt_error{"error opening output file: " + outfile}; + if (!out) + throw dnmt_error{"error opening output file: " + outfile}; if (n_threads > 1) { if (in.is_bgzf()) @@ -142,7 +145,8 @@ main_xcounts(int argc, const char **argv) { // use the kstring_t type to more directly use the BGZF file kstring_t line{0, 0, nullptr}; const int ret = ks_resize(&line, 1024); - if (ret) throw dnmt_error("failed to acquire buffer"); + if (ret) + throw dnmt_error("failed to acquire buffer"); vector buf(128); @@ -154,7 +158,8 @@ main_xcounts(int argc, const char **argv) { MSite site; while (status_ok && bamxx::getline(in, line)) { if (is_counts_header_line(line.s)) { - if (!genome_file.empty() || !header_file.empty()) continue; + if (!genome_file.empty() || !header_file.empty()) + continue; found_header = true; const string header_line{line.s}; write_counts_header_line(header_line, out); @@ -162,7 +167,8 @@ main_xcounts(int argc, const char **argv) { } status_ok = site.initialize(line.s, line.s + line.l); - if (!status_ok || !found_header) break; + if (!status_ok || !found_header) + break; if (site.chrom != prev_chrom) { if (verbose) @@ -183,8 +189,7 @@ main_xcounts(int argc, const char **argv) { ks_free(&line); if (!status_ok) { - cerr << "failed converting " - << filename << " to " << outfile << endl; + cerr << "failed converting " << filename << " to " << outfile << endl; return EXIT_FAILURE; } if (!found_header) {