Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding option to force gzip output for xcounts #252

Merged
merged 1 commit into from
Nov 19, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 28 additions & 23 deletions src/utils/xcounts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@

#include <bamxx.hpp>

#include <charconv>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>
#include <charconv>
#include <system_error>
#include <vector>

// from smithlab_cpp
#include "OptionParser.hpp"
Expand All @@ -40,13 +40,12 @@ using std::endl;
using std::runtime_error;
using std::string;
using std::to_chars;
using std::vector;
using std::to_string;
using std::vector;

using bamxx::bgzf_file;


template<typename T>
template <typename T>
static inline uint32_t
fill_output_buffer(const uint32_t offset, const MSite &s, T &buf) {
auto buf_end = buf.data() + buf.size();
Expand All @@ -59,11 +58,11 @@ fill_output_buffer(const uint32_t offset, const MSite &s, T &buf) {
return std::distance(buf.data(), res.ptr);
}


int
main_xcounts(int argc, const char **argv) {
try {
bool verbose = false;
bool gzip_output = false;
bool require_coverage = false;
size_t n_threads = 1;
string genome_file;
Expand All @@ -78,14 +77,17 @@ main_xcounts(int argc, const char **argv) {
"<counts-file> (\"-\" for standard input)", 1);
opt_parse.add_opt("output", 'o', "output file (default is standard out)",
false, outfile);
opt_parse.add_opt("chroms", 'c', "make header from this reference",
false, genome_file);
opt_parse.add_opt("reads", 'r', "ouput only sites with reads",
false, require_coverage);
opt_parse.add_opt("header", 'h', "use this file to generate header",
false, header_file);
opt_parse.add_opt("chroms", 'c', "make header from this reference", false,
genome_file);
opt_parse.add_opt("reads", 'r', "ouput only sites with reads", false,
require_coverage);
opt_parse.add_opt("header", 'h', "use this file to generate header", false,
header_file);
opt_parse.add_opt("threads", 't', "threads for compression (use few)",
false, n_threads);
opt_parse.add_opt("zip", 'z',
"gzip compress output (automatic if input is gzip)",
false, gzip_output);
opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
std::vector<string> leftover_args;
opt_parse.parse(argc, argv, leftover_args);
Expand All @@ -112,21 +114,22 @@ main_xcounts(int argc, const char **argv) {
vector<string> chrom_names;
vector<uint64_t> chrom_sizes;
if (!genome_file.empty()) {
const int ret =
get_chrom_sizes_for_counts_header(n_threads, genome_file,
chrom_names, chrom_sizes);
const int ret = get_chrom_sizes_for_counts_header(
n_threads, genome_file, chrom_names, chrom_sizes);
if (ret)
throw dnmt_error{"failed to get chrom sizes from: " + genome_file};
}

bamxx::bam_tpool tpool(n_threads);
bgzf_file in(filename, "r");
if (!in) throw dnmt_error{"could not open file: " + filename};
if (!in)
throw dnmt_error{"could not open file: " + filename};

const auto outfile_mode = in.is_compressed() ? "w" : "wu";
const auto outfile_mode = (gzip_output || in.is_compressed()) ? "w" : "wu";

bgzf_file out(outfile, outfile_mode);
if (!out) throw dnmt_error{"error opening output file: " + outfile};
if (!out)
throw dnmt_error{"error opening output file: " + outfile};

if (n_threads > 1) {
if (in.is_bgzf())
Expand All @@ -142,7 +145,8 @@ main_xcounts(int argc, const char **argv) {
// use the kstring_t type to more directly use the BGZF file
kstring_t line{0, 0, nullptr};
const int ret = ks_resize(&line, 1024);
if (ret) throw dnmt_error("failed to acquire buffer");
if (ret)
throw dnmt_error("failed to acquire buffer");

vector<char> buf(128);

Expand All @@ -154,15 +158,17 @@ main_xcounts(int argc, const char **argv) {
MSite site;
while (status_ok && bamxx::getline(in, line)) {
if (is_counts_header_line(line.s)) {
if (!genome_file.empty() || !header_file.empty()) continue;
if (!genome_file.empty() || !header_file.empty())
continue;
found_header = true;
const string header_line{line.s};
write_counts_header_line(header_line, out);
continue;
}

status_ok = site.initialize(line.s, line.s + line.l);
if (!status_ok || !found_header) break;
if (!status_ok || !found_header)
break;

if (site.chrom != prev_chrom) {
if (verbose)
Expand All @@ -183,8 +189,7 @@ main_xcounts(int argc, const char **argv) {
ks_free(&line);

if (!status_ok) {
cerr << "failed converting "
<< filename << " to " << outfile << endl;
cerr << "failed converting " << filename << " to " << outfile << endl;
return EXIT_FAILURE;
}
if (!found_header) {
Expand Down
Loading