Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

for creating indexing file in samtools merge #193

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion bgzf.c
Original file line number Diff line number Diff line change
Expand Up @@ -701,11 +701,18 @@ static int mt_flush_queue(BGZF *fp)
while (mt->proc_cnt < mt->n_threads);
// dump data to disk
for (i = 0; i < mt->n_threads; ++i) fp->errcode |= mt->w[i].errcode;
for (i = 0; i < mt->curr; ++i)
for (i = 0; i < mt->curr; ++i) {
if (hwrite(fp->fp, mt->blk[i], mt->len[i]) != mt->len[i]) {
fp->errcode |= BGZF_ERR_IO;
break;
}

if(fp->close == 1 && fp->address_count < fp->address_capacity) {
fp->address[fp->address_count] = htell(fp->fp);
fp->address_count++;
}
}

mt->curr = 0;
return (fp->errcode == 0)? 0 : -1;
}
Expand Down Expand Up @@ -756,6 +763,12 @@ int bgzf_flush(BGZF *fp)
}
fp->block_address += block_length;
}

if(fp->close == 1 && fp->address_count < fp->address_capacity) {
fp->address[fp->address_count] = htell(fp->fp);
fp->address_count++;
}

return 0;
}

Expand Down Expand Up @@ -795,6 +808,8 @@ ssize_t bgzf_raw_write(BGZF *fp, const void *data, size_t length)

int bgzf_close(BGZF* fp)
{
fp->close = 0;

int ret, block_length;
if (fp == 0) return -1;
if (fp->is_write && fp->is_compressed) {
Expand Down
74 changes: 69 additions & 5 deletions hts.c
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,8 @@ hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_l
idx->z.save_bin = idx->z.save_tid = idx->z.last_tid = idx->z.last_bin = 0xffffffffu;
idx->z.save_off = idx->z.last_off = idx->z.off_beg = idx->z.off_end = offset0;
idx->z.last_coor = 0xffffffffu;
idx->n_no_coor = idx->z.n_mapped = idx->z.n_unmapped = idx->z.off_end = 0;

if (n) {
idx->n = idx->m = n;
idx->bidx = (bidx_t**)calloc(n, sizeof(bidx_t*));
Expand Down Expand Up @@ -1016,6 +1018,62 @@ int hts_idx_push(hts_idx_t *idx, int tid, int beg, int end, uint64_t offset, int
return 0;
}

void hts_idx_replace_address(const hts_idx_t *idx, const int no_address_cushion, const int64_t no_address_cushion_value, const unsigned int address_capacity, const int64_t *address)
{
int i, j, address_index, block_offset;
khint_t k;
for (i = 0; i < idx->n; ++i) {
bidx_t *bidx = idx->bidx[i];
lidx_t *lidx = idx->lidx + i;
if(bidx) {
for (k = kh_begin(bidx); k != kh_end(bidx); ++k) {
if (kh_exist(bidx, k)) {
bins_t *p = &kh_value(bidx, k);
for (j = 0; j < p->n; ++j) {
if(p->list[j].u >= no_address_cushion_value) {
block_offset = p->list[j].u & 0xFFFF;
address_index = (p->list[j].u >> 16) - no_address_cushion;
if(address_index >= 0) {
if(address_index >= address_capacity) {
fprintf(stderr, "[W::%s] uaddress_index = %d, is over address_capacity = %d\n", __func__, address_index, address_capacity);
address_index = address_capacity - 1;
}
p->list[j].u = ((address[address_index] << 16) | block_offset);
}
}

if(p->list[j].v >= no_address_cushion_value) {
block_offset = p->list[j].v & 0xFFFF;
address_index = (p->list[j].v >> 16) - no_address_cushion;
if(address_index >= 0) {
if(address_index >= address_capacity) {
fprintf(stderr, "[W::%s] vaddress_index = %d, is over address_capacity = %d\n", __func__, address_index, address_capacity);
address_index = address_capacity - 1;
}
p->list[j].v = ((address[address_index] << 16) | block_offset);
}
}
} // ~for(j)
} // ~if(kh_exist)
} // ~for(k)
}

for (j = 0; j < lidx->n; ++j) {
if(lidx->offset[j] >= no_address_cushion_value) {
block_offset = lidx->offset[j] & 0xFFFF;
address_index = (lidx->offset[j] >> 16) - no_address_cushion;
if(address_index >= 0) {
if(address_index >= address_capacity) {
fprintf(stderr, "[W::%s] oaddress_index = %d, is over address_capacity = %d\n", __func__, address_index, address_capacity);
address_index = address_capacity - 1;
}
lidx->offset[j] = ((address[address_index] << 16) | block_offset);
}
}
} // ~for(j)
} // ~for(i)
}

void hts_idx_destroy(hts_idx_t *idx)
{
khint_t k;
Expand Down Expand Up @@ -1149,11 +1207,17 @@ void hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt)
hts_idx_save_core(idx, fp, HTS_FMT_TBI);
bgzf_close(fp);
} else if (fmt == HTS_FMT_BAI) {
FILE *fp;
fp = fopen(strcat(fnidx, ".bai"), "w");
fwrite("BAI\1", 1, 4, fp);
hts_idx_save_core(idx, fp, HTS_FMT_BAI);
fclose(fp);
if(strcmp("/dev/stdout", fnidx) == 0) {
fwrite("BAI\1", 1, 4, stdout);
hts_idx_save_core(idx, stdout, HTS_FMT_BAI);
}
else {
FILE *fp;
fp = fopen(strcat(fnidx, ".bai"), "w");
fwrite("BAI\1", 1, 4, fp);
hts_idx_save_core(idx, fp, HTS_FMT_BAI);
fclose(fp);
}
} else abort();
free(fnidx);
}
Expand Down
3 changes: 3 additions & 0 deletions htslib/bgzf.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ struct BGZF {
bgzidx_t *idx; // BGZF index
int idx_build_otf; // build index on the fly, set by bgzf_index_build_init()
z_stream *gz_stream;// for gzip-compressed files
unsigned int address_count, address_capacity;
int64_t *address;
int close;
};
#ifndef HTS_BGZF_TYPEDEF
typedef struct BGZF BGZF;
Expand Down
2 changes: 2 additions & 0 deletions htslib/hts.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,8 @@ typedef struct {
int hts_idx_push(hts_idx_t *idx, int tid, int beg, int end, uint64_t offset, int is_mapped);
void hts_idx_finish(hts_idx_t *idx, uint64_t final_offset);

void hts_idx_replace_address(const hts_idx_t *idx, const int no_address_cushion, const int64_t no_address_cushion_value, const unsigned int address_capacity, const int64_t *address);

void hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt);
hts_idx_t *hts_idx_load(const char *fn, int fmt);

Expand Down