diff --git a/src/maf_tools.cpp b/src/maf_tools.cpp index a55d172..1caf8d2 100644 --- a/src/maf_tools.cpp +++ b/src/maf_tools.cpp @@ -124,6 +124,7 @@ PermVec parseGff(const std::string& filename, int minBlockLen) std::cerr << "\tReading GFF file" << std::endl; std::unordered_map permBySeqId; + std::unordered_map sequenceLengths; //std::unordered_map> newBlocks; std::ifstream fin(filename); @@ -134,6 +135,16 @@ PermVec parseGff(const std::string& filename, int minBlockLen) { std::getline(fin, line); if (line.empty()) continue; + + //reading sequence lengths + if (line.substr(0, 17) == "##sequence-region") + { + auto tokens = split(line, " "); + if (tokens.size() != 4) continue; + int length = atoi(tokens[3].c_str()) - atoi(tokens[2].c_str()) + 1; + sequenceLengths[tokens[1]] = length; + } + if (line[0] == '#') continue; auto tokens = split(line, "\t"); @@ -160,6 +171,14 @@ PermVec parseGff(const std::string& filename, int minBlockLen) permBySeqId[seqName].seqName = seqName; } + for (auto permIt : permBySeqId) + { + if (sequenceLengths.count(permIt.first)) + { + permIt.second.nucLength = sequenceLengths[permIt.first]; + } + } + std::vector permutations; int seqId = 1; auto cmp = [](const Block& a, const Block& b) {return a.start < b.start;};