From 18e4e1f4df187de6093b7038597536631281e19a Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Thu, 31 Oct 2024 15:15:35 +0000 Subject: [PATCH] Fix threaded sam_read1 after EOF. The SAM sam_dispatcher_read decodes blocks of SAM records into blocks of BAM records. As this is (hopefully) reading ahead of the sam_read1 consumer code, when it hits EOF it adds a final NULL block as a sentinel. This works well and it forces sam_read1 to return EOF too. However, if we ignore that and call sam_read1 again, it's consumed our sentinel block and it gets stuck waiting for the next block of BAM records. We now cache the EOF status and check first. Note this doesn't impact on iterators as they work at a different level already and it's the iterator itself which tracks EOF. Fixes #1855 --- sam.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sam.c b/sam.c index 3593b5507..ce5833bf7 100644 --- a/sam.c +++ b/sam.c @@ -3216,6 +3216,7 @@ enum sam_cmd { SAM_NONE = 0, SAM_CLOSE, SAM_CLOSE_DONE, + SAM_AT_EOF, }; typedef struct SAM_state { @@ -3651,6 +3652,7 @@ static void *sam_dispatcher_read(void *vp) { pthread_mutex_unlock(&fd->command_m); } + // Submit a NULL sp_bams entry to act as an EOF marker if (hts_tpool_dispatch(fd->p, fd->q, sam_parse_eof, NULL) < 0) goto err; @@ -4308,14 +4310,25 @@ static inline int sam_read1_sam(htsFile *fp, sam_hdr_t *h, bam1_t *b) { errno = fd->errcode; return -2; } + + pthread_mutex_lock(&fd->command_m); + int cmd = fd->command; + pthread_mutex_unlock(&fd->command_m); + if (cmd == SAM_AT_EOF) + return -1; + hts_tpool_result *r = hts_tpool_next_result_wait(fd->q); if (!r) return -2; fd->curr_bam = gb = (sp_bams *)hts_tpool_result_data(r); hts_tpool_delete_result(r, 0); } - if (!gb) + if (!gb) { + pthread_mutex_lock(&fd->command_m); + fd->command = SAM_AT_EOF; + pthread_mutex_unlock(&fd->command_m); return fd->errcode ? -2 : -1; + } bam1_t *b_array = (bam1_t *)gb->bams; if (fd->curr_idx < gb->nbams) if (!bam_copy1(b, &b_array[fd->curr_idx++]))