Skip to content

Commit

Permalink
15.0.67
Browse files Browse the repository at this point in the history
  • Loading branch information
Divon Lan committed Sep 23, 2024
1 parent 8188654 commit 30a6d33
Show file tree
Hide file tree
Showing 66 changed files with 728 additions and 529 deletions.
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,5 +159,5 @@ ABOVE STATED REMEDY FAILS OF ITS ESSENTIAL PURPOSE.

END OF TERMS AND CONDITIONS

Genozip license version: 15.0.66
Genozip license version: 15.0.67

3 changes: 3 additions & 0 deletions RELEASE_NOTES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ Note on versioning:
- Minor version changes with bug fixes and minor feature updates
- Some minor versions are skipped due to failed deployment pipelines

15.0.67 23/9/2024
- Improvements in Deep.

15.0.66 15/9/2024
- BAM: better compression of PacBio and Nanopore files generated with minimap2, pbmm2, winnowmap
- BAM: further small reduction in RAM consumption when compressing and uncompressing SAM/BAM/CRAM files with many Supplementary and Secondary alignments
Expand Down
3 changes: 2 additions & 1 deletion genozip.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@
"string": "c",
"string_view": "c",
"bzlib.h": "c",
"memory": "c"
"memory": "c",
"vector": "c"
}
}
}
2 changes: 1 addition & 1 deletion installers/LICENSE.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@
10. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides Genozip on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Genozip and assume any risks associated with Your exercise of permissions under this License.<br><br>
11. LIMITATION OF LIABILITY. TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, STRICT LIABILITY OR OTHER LEGAL OR EQUITABLE THEORY, SHALL LICENSOR OR DEVELOPER BE LIABLE FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER ARISING AS A RESULT OF THIS LICENSE OR OUT OF THE USE OR INABILITY TO USE GENOZIP (INCLUDING BUT NOT LIMITED TO DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, FILE CORRUPTION, DATA LOSS, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES), EVEN IF LICENSOR OR DEVELOPER HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL LICENSOR'S OR DEVELOPER'S TOTAL LIABILITY TO LICENSEE FOR ALL DAMAGES (OTHER THAN AS MAY BE REQUIRED BY APPLICABLE LAW IN CASES INVOLVING PERSONAL INJURY) EXCEED THE AMOUNT OF $500 USD. THE FOREGOING LIMITATIONS WILL APPLY EVEN IF THE ABOVE STATED REMEDY FAILS OF ITS ESSENTIAL PURPOSE.<br><br>
END OF TERMS AND CONDITIONS<br><br>
Genozip license version: 15.0.66<br><br>
Genozip license version: 15.0.67<br><br>
Binary file modified installers/genozip-installer.exe
Binary file not shown.
Binary file modified installers/genozip-linux-x86_64.tar
Binary file not shown.
Binary file modified installers/genozip-osx-arm.tar
Binary file not shown.
Binary file modified installers/genozip-osx-x86.tar
Binary file not shown.
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ LATEST_SRC = ../../genozip-latest/src
SRC_DIRS = secure zlib bzlib lzma bsc libdeflate_1.7 libdeflate_1.19 libdeflate_1.19/x86 libdeflate_1.19/arm \
htscodecs igzip igzip/aarch64 igzip/x86_64 igzip/noarch

MY_SRCS = genozip.c genols.c context.c container.c strings.c stats.c arch.c tip.c seg_id.c zip_dyn_int.c \
MY_SRCS = genozip.c genols.c context.c container.c strings.c crc64.c stats.c arch.c tip.c seg_id.c zip_dyn_int.c\
data_types.c bits.c progress.c writer.c zriter.c tar.c chrom.c qname.c tokenizer.c mutex.c threads.c \
zip.c piz.c reconstruct.c recon_history.c recon_peek.c seg.c zfile.c aligner.c flags.c specials.c \
reference.c contigs.c ref_lock.c refhash.c ref_make.c ref_contigs.c ref_iupacs.c ref_cache.c digest.c \
Expand Down
4 changes: 2 additions & 2 deletions src/bam_seg.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ uint32_t bam_split_aux (VBlockSAMP vb, rom alignment, rom aux, rom after_aux, ro
void bam_seg_BIN (VBlockSAMP vb, ZipDataLineSAMP dl, uint16_t bin /* used only in bam */, bool is_bam)
{
PosType32 this_pos = dl->POS;
PosType32 last_pos = dl->FLAG.unmapped ? this_pos : (this_pos + vb->ref_consumed - 1);
PosType32 last_pos = dl->FLAG.unmapped ? this_pos : (this_pos + vb->ref_consumed - 1); // note: it is possible to have RNAME/POS set and FLAG.unmapped. The SAM spec calls this "placed unmapped";
uint16_t reg2bin = bam_reg2bin (this_pos, last_pos); // zero-based, half-closed half-open [start,end)

if (!is_bam || (last_pos <= MAX_POS_SAM && reg2bin == bin))
Expand Down Expand Up @@ -549,7 +549,7 @@ rom bam_seg_txt_line (VBlockP vb_, rom alignment /* BAM terminology for one line

// finally we can segment the textual CIGAR now (including if n_cigar_op=0)
sam_seg_CIGAR (vb, dl, vb->textual_cigar.len32, STRb(vb->textual_seq), qual, l_seq,
((uint32_t)n_cigar_op * sizeof (uint32_t) /* cigar */ + sizeof (uint16_t) /* n_cigar_op */));
((uint32_t)n_cigar_op * sizeof (uint32_t) /* cigar */ + sizeof (uint16_t) /* n_cigar_op */));

// QUAL. note: can only be called after sam_seg_CIGAR updates SEQ.len
if (!vb->qual_missing) // case we have both SEQ and QUAL
Expand Down
2 changes: 1 addition & 1 deletion src/codec_domq.c
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@ CODEC_RECONSTRUCT (codec_domq_reconstruct)
START_TIMER;

if (!ctx->is_loaded && !(ctx+1)->is_loaded && !(ctx+2)->is_loaded && !(ctx+3)->is_loaded) return;

ContextP declare_domq_contexts (ctx);

// case: up to v13, all reads were compressed with the same dom (no multiplexing)
Expand Down
31 changes: 19 additions & 12 deletions src/conda/meta.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,21 @@ build:
requirements:
build:
- {{ posix }}make
- {{ posix }}filesystem # [win]
- {{ posix }}sed # [win]
- {{ posix }}coreutils # [win]
- {{ posix }}zip # [win]
- nasm # [not arm64]
- {{ compiler('c') }} # [not win]
- {{ compiler('cxx') }} # [not win]
- {{ compiler('c') }} # [unix]
- {{ compiler('cxx') }} # [unix]
- {{ stdlib("c") }} # [unix]
- {{ compiler('m2w64_c') }} # [win]
- {{ posix }}sed # [win]
- {{ posix }}coreutils # [win]

c_stdlib: # added per: https://github.com/conda-forge/conda-forge.github.io/issues/2102
- sysroot # [linux]
- macosx_deployment_target # [osx]
# - vs # [win]
- {{ compiler('m2w64_cxx') }} # [win]
- {{ stdlib("m2w64_c") }} # [win]

# c_stdlib: # added per: https://github.com/conda-forge/conda-forge.github.io/issues/2102
# - sysroot # [linux]
# - macosx_deployment_target # [osx]

c_stdlib_version: # [unix]
- 2.12 # [linux and x86_64]
Expand All @@ -46,9 +50,12 @@ requirements:
- 11.0 # [osx and arm64]

host:
- {{ native }}gcc-libs # [win]
# - {{ native }}gcc-libs # [win]
- pthreads-win32 # [win]

run:
- {{ native }}gcc-libs # [win]
# - {{ native }}gcc-libs # [win]
- pthreads-win32 # [win]
- curl

test:
Expand All @@ -64,7 +71,7 @@ about:
license_family: OTHER
license_file:
- LICENSE.txt
summary: Compressor for genomic files (FASTQ, BAM, VCF, FASTA and more), up to 5x better than gzip and faster too
summary: Lossless compression of FASTQ, BAM, VCF, FASTA - 2x-10x better than .gz / .cram
description: |
__README_MD__

Expand Down
121 changes: 121 additions & 0 deletions src/crc64.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@

/* Redis uses the CRC64 variant with "Jones" coefficients and init value of 0.
*
* Specification of this CRC64 variant follows:
* Name: crc-64-jones
* Width: 64 bites
* Poly: 0xad93d23594c935a9
* Reflected In: True
* Xor_In: 0xffffffffffffffff
* Reflected_Out: True
* Xor_Out: 0x0
* Check("123456789"): 0xe9c6d914c4b8d9ca
*
* Copyright (c) 2012, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE. */

// Genozip comment:
// This file was obtained from https://raw.githubusercontent.com/srned/baselib/refs/heads/master/crc64.c and modified for Genozip purposes. Modifications are:
// Copyright (C) 2024-2024 Genozip Limited. Patent Pending. Please see terms and conditions in the file LICENSE.txt

#include "genozip.h"

uint64_t crc64 (uint64_t crc, bytes data, uint64_t data_len)
{
static const uint64_t crc64_tab[256] = {
0x0000000000000000, 0x7ad870c830358979, 0xf5b0e190606b12f2, 0x8f689158505e9b8b,
0xc038e5739841b68f, 0xbae095bba8743ff6, 0x358804e3f82aa47d, 0x4f50742bc81f2d04,
0xab28ecb46814fe75, 0xd1f09c7c5821770c, 0x5e980d24087fec87, 0x24407dec384a65fe,
0x6b1009c7f05548fa, 0x11c8790fc060c183, 0x9ea0e857903e5a08, 0xe478989fa00bd371,
0x7d08ff3b88be6f81, 0x07d08ff3b88be6f8, 0x88b81eabe8d57d73, 0xf2606e63d8e0f40a,
0xbd301a4810ffd90e, 0xc7e86a8020ca5077, 0x4880fbd87094cbfc, 0x32588b1040a14285,
0xd620138fe0aa91f4, 0xacf86347d09f188d, 0x2390f21f80c18306, 0x594882d7b0f40a7f,
0x1618f6fc78eb277b, 0x6cc0863448deae02, 0xe3a8176c18803589, 0x997067a428b5bcf0,
0xfa11fe77117cdf02, 0x80c98ebf2149567b, 0x0fa11fe77117cdf0, 0x75796f2f41224489,
0x3a291b04893d698d, 0x40f16bccb908e0f4, 0xcf99fa94e9567b7f, 0xb5418a5cd963f206,
0x513912c379682177, 0x2be1620b495da80e, 0xa489f35319033385, 0xde51839b2936bafc,
0x9101f7b0e12997f8, 0xebd98778d11c1e81, 0x64b116208142850a, 0x1e6966e8b1770c73,
0x8719014c99c2b083, 0xfdc17184a9f739fa, 0x72a9e0dcf9a9a271, 0x08719014c99c2b08,
0x4721e43f0183060c, 0x3df994f731b68f75, 0xb29105af61e814fe, 0xc849756751dd9d87,
0x2c31edf8f1d64ef6, 0x56e99d30c1e3c78f, 0xd9810c6891bd5c04, 0xa3597ca0a188d57d,
0xec09088b6997f879, 0x96d1784359a27100, 0x19b9e91b09fcea8b, 0x636199d339c963f2,
0xdf7adabd7a6e2d6f, 0xa5a2aa754a5ba416, 0x2aca3b2d1a053f9d, 0x50124be52a30b6e4,
0x1f423fcee22f9be0, 0x659a4f06d21a1299, 0xeaf2de5e82448912, 0x902aae96b271006b,
0x74523609127ad31a, 0x0e8a46c1224f5a63, 0x81e2d7997211c1e8, 0xfb3aa75142244891,
0xb46ad37a8a3b6595, 0xceb2a3b2ba0eecec, 0x41da32eaea507767, 0x3b024222da65fe1e,
0xa2722586f2d042ee, 0xd8aa554ec2e5cb97, 0x57c2c41692bb501c, 0x2d1ab4dea28ed965,
0x624ac0f56a91f461, 0x1892b03d5aa47d18, 0x97fa21650afae693, 0xed2251ad3acf6fea,
0x095ac9329ac4bc9b, 0x7382b9faaaf135e2, 0xfcea28a2faafae69, 0x8632586aca9a2710,
0xc9622c4102850a14, 0xb3ba5c8932b0836d, 0x3cd2cdd162ee18e6, 0x460abd1952db919f,
0x256b24ca6b12f26d, 0x5fb354025b277b14, 0xd0dbc55a0b79e09f, 0xaa03b5923b4c69e6,
0xe553c1b9f35344e2, 0x9f8bb171c366cd9b, 0x10e3202993385610, 0x6a3b50e1a30ddf69,
0x8e43c87e03060c18, 0xf49bb8b633338561, 0x7bf329ee636d1eea, 0x012b592653589793,
0x4e7b2d0d9b47ba97, 0x34a35dc5ab7233ee, 0xbbcbcc9dfb2ca865, 0xc113bc55cb19211c,
0x5863dbf1e3ac9dec, 0x22bbab39d3991495, 0xadd33a6183c78f1e, 0xd70b4aa9b3f20667,
0x985b3e827bed2b63, 0xe2834e4a4bd8a21a, 0x6debdf121b863991, 0x1733afda2bb3b0e8,
0xf34b37458bb86399, 0x8993478dbb8deae0, 0x06fbd6d5ebd3716b, 0x7c23a61ddbe6f812,
0x3373d23613f9d516, 0x49aba2fe23cc5c6f, 0xc6c333a67392c7e4, 0xbc1b436e43a74e9d,
0x95ac9329ac4bc9b5, 0xef74e3e19c7e40cc, 0x601c72b9cc20db47, 0x1ac40271fc15523e,
0x5594765a340a7f3a, 0x2f4c0692043ff643, 0xa02497ca54616dc8, 0xdafce7026454e4b1,
0x3e847f9dc45f37c0, 0x445c0f55f46abeb9, 0xcb349e0da4342532, 0xb1eceec59401ac4b,
0xfebc9aee5c1e814f, 0x8464ea266c2b0836, 0x0b0c7b7e3c7593bd, 0x71d40bb60c401ac4,
0xe8a46c1224f5a634, 0x927c1cda14c02f4d, 0x1d148d82449eb4c6, 0x67ccfd4a74ab3dbf,
0x289c8961bcb410bb, 0x5244f9a98c8199c2, 0xdd2c68f1dcdf0249, 0xa7f41839ecea8b30,
0x438c80a64ce15841, 0x3954f06e7cd4d138, 0xb63c61362c8a4ab3, 0xcce411fe1cbfc3ca,
0x83b465d5d4a0eece, 0xf96c151de49567b7, 0x76048445b4cbfc3c, 0x0cdcf48d84fe7545,
0x6fbd6d5ebd3716b7, 0x15651d968d029fce, 0x9a0d8ccedd5c0445, 0xe0d5fc06ed698d3c,
0xaf85882d2576a038, 0xd55df8e515432941, 0x5a3569bd451db2ca, 0x20ed197575283bb3,
0xc49581ead523e8c2, 0xbe4df122e51661bb, 0x3125607ab548fa30, 0x4bfd10b2857d7349,
0x04ad64994d625e4d, 0x7e7514517d57d734, 0xf11d85092d094cbf, 0x8bc5f5c11d3cc5c6,
0x12b5926535897936, 0x686de2ad05bcf04f, 0xe70573f555e26bc4, 0x9ddd033d65d7e2bd,
0xd28d7716adc8cfb9, 0xa85507de9dfd46c0, 0x273d9686cda3dd4b, 0x5de5e64efd965432,
0xb99d7ed15d9d8743, 0xc3450e196da80e3a, 0x4c2d9f413df695b1, 0x36f5ef890dc31cc8,
0x79a59ba2c5dc31cc, 0x037deb6af5e9b8b5, 0x8c157a32a5b7233e, 0xf6cd0afa9582aa47,
0x4ad64994d625e4da, 0x300e395ce6106da3, 0xbf66a804b64ef628, 0xc5bed8cc867b7f51,
0x8aeeace74e645255, 0xf036dc2f7e51db2c, 0x7f5e4d772e0f40a7, 0x05863dbf1e3ac9de,
0xe1fea520be311aaf, 0x9b26d5e88e0493d6, 0x144e44b0de5a085d, 0x6e963478ee6f8124,
0x21c640532670ac20, 0x5b1e309b16452559, 0xd476a1c3461bbed2, 0xaeaed10b762e37ab,
0x37deb6af5e9b8b5b, 0x4d06c6676eae0222, 0xc26e573f3ef099a9, 0xb8b627f70ec510d0,
0xf7e653dcc6da3dd4, 0x8d3e2314f6efb4ad, 0x0256b24ca6b12f26, 0x788ec2849684a65f,
0x9cf65a1b368f752e, 0xe62e2ad306bafc57, 0x6946bb8b56e467dc, 0x139ecb4366d1eea5,
0x5ccebf68aecec3a1, 0x2616cfa09efb4ad8, 0xa97e5ef8cea5d153, 0xd3a62e30fe90582a,
0xb0c7b7e3c7593bd8, 0xca1fc72bf76cb2a1, 0x45775673a732292a, 0x3faf26bb9707a053,
0x70ff52905f188d57, 0x0a2722586f2d042e, 0x854fb3003f739fa5, 0xff97c3c80f4616dc,
0x1bef5b57af4dc5ad, 0x61372b9f9f784cd4, 0xee5fbac7cf26d75f, 0x9487ca0fff135e26,
0xdbd7be24370c7322, 0xa10fceec0739fa5b, 0x2e675fb4576761d0, 0x54bf2f7c6752e8a9,
0xcdcf48d84fe75459, 0xb71738107fd2dd20, 0x387fa9482f8c46ab, 0x42a7d9801fb9cfd2,
0x0df7adabd7a6e2d6, 0x772fdd63e7936baf, 0xf8474c3bb7cdf024, 0x829f3cf387f8795d,
0x66e7a46c27f3aa2c, 0x1c3fd4a417c62355, 0x935745fc4798b8de, 0xe98f353477ad31a7,
0xa6df411fbfb21ca3, 0xdc0731d78f8795da, 0x536fa08fdfd90e51, 0x29b7d047efec8728,
};

for (uint64_t i=0; i < data_len; i++)
crc = crc64_tab[(uint8_t)crc ^ data[i]] ^ (crc >> 8);

return crc;
}

28 changes: 0 additions & 28 deletions src/deep.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@

#define MAX_AUTO_READ_LEN 20000 // not too long, so that the "longer reads" code path also gets some mileage

rom by_names[2] = { "BY_SEQ", "BY_QNAME" };

// hash of a SEQ field in the forward direction
// note: I tested crc32 after converting seq to 2-bit. No advantage - Almost identical linked-list-length histogram.
uint32_t deep_seq_hash (VBlockP vb, STRp(seq), bool is_revcomp)
Expand Down Expand Up @@ -90,31 +88,5 @@ uint32_t deep_qual_hash (VBlockP vb, STRp(qual), bool is_revcomp)
if (is_revcomp && qual_len > MAX_AUTO_READ_LEN)
buf_free (vb->scratch);

// xxx possible better but slower hash for qual
// char *my_qual;

// // short enough reads - use automatic allocation
// if (qual_len <= MAX_AUTO_READ_LEN)
// my_qual = short_read_data;
// else {
// ASSERTNOTINUSE (vb->scratch);
// buf_alloc (vb, &vb->scratch, 0, qual_len, char, 0, "scratch");
// my_qual = B1STc (vb->scratch);
// }

// if (is_revcomp)
// str_reverse (my_qual, STRa(qual));

// for (uint32_t i=0; i < qual_len; i++)
// my_qual[i] ^= (i & 0xff);

// uint32_t hash = crc32 (0, my_qual, qual_len);

// // note: qual_hash=0 means "QUAL not hashed", so both crc32=0 and crc32=1 get mapped to hash=1
// if (hash == 0) hash = 1;

// if (qual_len > MAX_AUTO_READ_LEN)
// buf_free (vb->scratch);

return hash;
}
19 changes: 8 additions & 11 deletions src/deep.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,19 @@
#pragma once

#include "qname.h"

#define num_hash_bits z_file->deep_index_by[BY_SEQ].prm8[0] // ZIP: number of bits of seq_hash used for deep_hash_by_* (i.e. hash table is of size 2^num_hash_bits)
#define num_hash_bits z_file->deep_index.prm8[0] // ZIP: number of bits of seq_hash used for deep_hash_by_* (i.e. hash table is of size 2^num_hash_bits)

#define deephash_issame(a,b) (!memcmp (&(a), &(b), sizeof (DeepHash)))
#define DEEPHASHf(a) (a).qname, (a).seq, (a).qual // for printf-like arguments

// note: in PIZ, z_file->vb_start_deep_line is indexed by vb_idx - 0-based counter of non-DEPN VBs (unlike in ZIP which is vb_i)
#define num_deepable_sam_vbs z_file->deep_index.param
#define num_deepable_sam_vbs z_file->deep_index.prm32[0] // PIZ

// 32 bytes
typedef struct {
DeepHash hash; // hashes of qname, seq, qual
DeepHash hash; // hashes of qname (64b), seq, qual (32b)
#define NO_NEXT 0xffffffff
uint32_t next[2]; // two linked lists (BY_SEQ, BY_QNAME) of entries with the with the same hash(SEQ)&mask
uint32_t next; // linked list of entries with the with the same (hash.qname & mask)
uint32_t seq_len;

union ZipZDeepPlace {
Expand All @@ -43,9 +42,9 @@ extern uint32_t deep_seq_hash (VBlockP vb, STRp(seq), bool is_revcomp);
extern uint32_t deep_qual_hash (VBlockP vb, STRp(qual), bool is_revcomp);

// ZIP: hash of canonical qname (note: in FASTQ qname is the part of DESC up to the first whitespace)
static inline uint32_t deep_qname_hash (QType q, STRp(qname), thool is_last, uint32_t *uncanonical_suffix_len)
{
return qname_calc_hash (q, COMP_NONE, STRa(qname), is_last, true, uncanonical_suffix_len);
static inline uint64_t deep_qname_hash (QType q, STRp(qname), thool is_last, uint32_t *uncanonical_suffix_len)
{
return qname_calc_hash (q, COMP_NONE, STRa(qname), is_last, true, CRC64, uncanonical_suffix_len);
}

//-----------------------------------------------------------------------
Expand All @@ -56,7 +55,7 @@ static inline uint32_t deep_qname_hash (QType q, STRp(qname), thool is_last, uin
// 2: is_long_seq_comp
// 3: is_long_qual_comp
//
// Part B: exists unless segconf.deep_qtype==QNONE or --seq-only or --qual-only
// Part B: exists unless segconf.deep_qtype==QNONE (supported up to 15.0.66) or --seq-only or --qual-only
// 1 Byte : qname_len (up to 254 by BAM spec)
// qname_len bytes : QNAME (not compressed, not nul-terminated)
//
Expand Down Expand Up @@ -97,5 +96,3 @@ typedef struct { // 1 byte
uint8_t is_long_qual_comp : 1; // is qual_comp_len greater than 255
uint8_t unused : 3;
} PizZDeepFlags;

extern rom by_names[2];
Loading

0 comments on commit 30a6d33

Please sign in to comment.