diff --git a/htscodecs/arith_dynamic.c b/htscodecs/arith_dynamic.c index 37aca77..90d9d73 100644 --- a/htscodecs/arith_dynamic.c +++ b/htscodecs/arith_dynamic.c @@ -733,15 +733,17 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, unsigned int c_meta_len; uint8_t *rle = NULL, *packed = NULL; - if (in_size > INT_MAX) { + if (in_size > INT_MAX || (out && *out_size == 0)) { *out_size = 0; return NULL; } if (!out) { *out_size = arith_compress_bound(in_size, order); - if (!(out = malloc(*out_size))) + if (!(out = malloc(*out_size))) { + *out_size = 0; return NULL; + } } unsigned char *out_end = out + *out_size; @@ -751,24 +753,30 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, if (order & X_CAT) { out[0] = X_CAT; c_meta_len = 1 + var_put_u32(&out[1], out_end, in_size); + if (c_meta_len + in_size > *out_size) { + *out_size = 0; + return NULL; + } memcpy(out+c_meta_len, in, in_size); *out_size = in_size+c_meta_len; } if (order & X_STRIPE) { - int N = (order>>8); + int N = (order>>8) & 0xff; if (N == 0) N = 4; // default for compatibility with old tests - if (N > 255) - return NULL; + if (N > in_size) + N = in_size; unsigned char *transposed = malloc(in_size); unsigned int part_len[256]; unsigned int idx[256]; - if (!transposed) + if (!transposed) { + *out_size = 0; return NULL; - int i, j, x; + } + int i, j, x; for (i = 0; i < N; i++) { part_len[i] = in_size / N + ((in_size % N) > i); idx[i] = i ? idx[i-1] + part_len[i-1] : 0; // cumulative index @@ -788,6 +796,12 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, c_meta_len = 1; *out = order & ~X_NOSZ; c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size); + if (c_meta_len >= *out_size) { + free(transposed); + *out_size = 0; + return NULL; + } + out[c_meta_len++] = N; out2_start = out2 = out+7+5*N; // shares a buffer with c_meta @@ -795,6 +809,7 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, // Brute force try all methods. // FIXME: optimise this bit. Maybe learn over time? int j, best_j = 0, best_sz = INT_MAX; + uint8_t *r; // Works OK with read names. The first byte is the most important, // as it has most variability (little-endian). After that it's @@ -843,24 +858,37 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, // {1, 128}}; for (j = 1; j <= m[MIN(i,3)][0]; j++) { + if (out2 - out > *out_size) + continue; // an error, but caught in best_sz check later + olen2 = *out_size - (out2 - out); //fprintf(stderr, "order=%d m=%d\n", order&3, m[MIN(i,4)][j]); if ((order&3) == 0 && (m[MIN(i,3)][j]&1)) continue; - arith_compress_to(transposed+idx[i], part_len[i], - out2, &olen2, m[MIN(i,3)][j] | X_NOSZ); - if (best_sz > olen2) { + r = arith_compress_to(transposed+idx[i], part_len[i], + out2, &olen2, m[MIN(i,3)][j] | X_NOSZ); + if (r && olen2 && best_sz > olen2) { best_sz = olen2; best_j = j; } } -// if (best_j == 0) // none desireable -// return NULL; + + if (best_sz == INT_MAX) { + free(transposed); + *out_size = 0; + return NULL; + } if (best_j != j-1) { olen2 = *out_size - (out2 - out); - arith_compress_to(transposed+idx[i], part_len[i], - out2, &olen2, m[MIN(i,3)][best_j] | X_NOSZ); + r = arith_compress_to(transposed+idx[i], part_len[i], + out2, &olen2, + m[MIN(i,3)][best_j] | X_NOSZ); + if (!r) { + free(transposed); + *out_size = 0; + return NULL; + } } out2 += olen2; c_meta_len += var_put_u32(out+c_meta_len, out_end, olen2); @@ -893,6 +921,10 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, // PACK 2, 4 or 8 symbols into one byte. int pmeta_len; uint64_t packed_len; + if (c_meta_len + 256 > *out_size) { + *out_size = 0; + return NULL; + } packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len); if (!packed) { out[0] &= ~X_PACK; @@ -934,6 +966,7 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, #else fprintf(stderr, "Htscodecs has been compiled without libbz2 support\n"); free(out); + *out_size = 0; return NULL; #endif @@ -945,25 +978,40 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, // *out_size = lzma_size; } else { + uint8_t *r; if (do_rle) { if (order == 0) - arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size); + r=arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size); else - arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size); + r=arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size); } else { //if (order == 2) // arith_compress_O2(in, in_size, out+c_meta_len, out_size); //else if (order == 1) - arith_compress_O1(in, in_size, out+c_meta_len, out_size); + r=arith_compress_O1(in, in_size, out+c_meta_len, out_size); else - arith_compress_O0(in, in_size, out+c_meta_len, out_size); + r=arith_compress_O0(in, in_size, out+c_meta_len, out_size); + } + + if (!r) { + free(rle); + free(packed); + *out_size = 0; + return NULL; } } if (*out_size >= in_size) { out[0] &= ~(3|X_EXT); // no entropy encoding, but keep e.g. PACK out[0] |= X_CAT | no_size; + + if (out + c_meta_len + in_size > out_end) { + free(rle); + free(packed); + *out_size = 0; + return NULL; + } memcpy(out+c_meta_len, in, in_size); *out_size = in_size; } diff --git a/htscodecs/rANS_static4x16pr.c b/htscodecs/rANS_static4x16pr.c index 8c9a64a..fa16187 100644 --- a/htscodecs/rANS_static4x16pr.c +++ b/htscodecs/rANS_static4x16pr.c @@ -1164,7 +1164,7 @@ void rans_set_cpu(int opts) { unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, unsigned char *out,unsigned int *out_size, int order) { - if (in_size > INT_MAX) { + if (in_size > INT_MAX || (out && *out_size == 0)) { *out_size = 0; return NULL; } @@ -1177,8 +1177,10 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, *out_size = rans_compress_bound_4x16(in_size, order); if (*out_size == 0) return NULL; - if (!(out_free = out = malloc(*out_size))) + if (!(out_free = out = malloc(*out_size))) { + *out_size = 0; return NULL; + } } unsigned char *out_end = out + *out_size; @@ -1199,11 +1201,15 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, int N = (order>>8) & 0xff; if (N == 0) N = 4; // default for compatibility with old tests + if (N > in_size) + N = in_size; + unsigned char *transposed = malloc(in_size); unsigned int part_len[256]; unsigned int idx[256]; if (!transposed) { free(out_free); + *out_size = 0; return NULL; } int i, j, x; @@ -1241,6 +1247,13 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, c_meta_len = 1; *out = order & ~RANS_ORDER_NOSZ; c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size); + if (c_meta_len >= *out_size) { + free(out_free); + free(transposed); + *out_size = 0; + return NULL; + } + out[c_meta_len++] = N; unsigned char *out_best = NULL; @@ -1249,7 +1262,8 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, out2_start = out2 = out+7+5*N; // shares a buffer with c_meta for (i = 0; i < N; i++) { // Brute force try all methods. - int j, m[] = {1,64,128,0}, best_j = 0, best_sz = in_size+10; + uint8_t *r; + int j, m[] = {1,64,128,0}, best_j = 0, best_sz = INT_MAX; for (j = 0; j < sizeof(m)/sizeof(*m); j++) { if ((order & m[j]) != m[j]) continue; @@ -1257,18 +1271,24 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, // order-1 *only*; bit check above cannot elide order-0 if ((order & RANS_ORDER_STRIPE_NO0) && (m[j]&1) == 0) continue; + + if (out2 - out > *out_size) + continue; // an error, but caught in best_sz check later + olen2 = *out_size - (out2 - out); - rans_compress_to_4x16(transposed+idx[i], part_len[i], - out2, &olen2, - m[j] | RANS_ORDER_NOSZ - | (order&RANS_ORDER_X32)); - if (best_sz > olen2) { + r = rans_compress_to_4x16(transposed+idx[i], part_len[i], + out2, &olen2, + m[j] | RANS_ORDER_NOSZ + | (order&RANS_ORDER_X32)); + if (r && olen2 && best_sz > olen2) { best_sz = olen2; best_j = j; if (j < sizeof(m)/sizeof(*m) && olen2 > out_best_len) { unsigned char *tmp = realloc(out_best, olen2); if (!tmp) { free(out_free); + free(transposed); + *out_size = 0; return NULL; } out_best = tmp; @@ -1279,6 +1299,15 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, memcpy(out_best, out2, olen2); } } + + if (best_sz == INT_MAX) { + free(out_best); + free(out_free); + free(transposed); + *out_size = 0; + return NULL; + } + if (best_j < sizeof(m)/sizeof(*m)) { // Copy the best compression to output buffer if not current memcpy(out2, out_best, best_sz); @@ -1301,6 +1330,11 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, out[0] = RANS_ORDER_CAT; c_meta_len = 1; c_meta_len += var_put_u32(&out[1], out_end, in_size); + + if (c_meta_len + in_size > *out_size) { + *out_size = 0; + return NULL; + } if (in_size) memcpy(out+c_meta_len, in, in_size); *out_size = c_meta_len + in_size; @@ -1329,6 +1363,11 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, // PACK 2, 4 or 8 symbols into one byte. int pmeta_len; uint64_t packed_len; + if (c_meta_len + 256 > *out_size) { + free(out_free); + *out_size = 0; + return NULL; + } packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len); if (!packed) { out[0] &= ~RANS_ORDER_PACK; @@ -1357,6 +1396,7 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, c_rmeta_len = in_size+257; if (!(meta = malloc(c_rmeta_len))) { free(out_free); + *out_size = 0; return NULL; } @@ -1380,8 +1420,23 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, // Compress lengths with O0 and literals with O0/O1 ("order" param) int sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2), sz2; sz += var_put_u32(out+c_meta_len+sz, out_end, rle_len); + if ((c_meta_len+sz+5) > *out_size) { + free(out_free); + free(rle); + free(meta); + free(packed); + *out_size = 0; + return NULL; + } c_rmeta_len = *out_size - (c_meta_len+sz+5); - rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len); + if (!rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len)) { + free(out_free); + free(rle); + free(meta); + free(packed); + *out_size = 0; + return NULL; + } if (c_rmeta_len < rmeta_len) { sz2 = var_put_u32(out+c_meta_len+sz, out_end, c_rmeta_len); memmove(out+c_meta_len+sz+sz2, out+c_meta_len+sz+5, c_rmeta_len); @@ -1404,17 +1459,36 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, out[0] &= ~RANS_ORDER_RLE; } + if (c_meta_len > *out_size) { + free(rle); + free(packed); + *out_size = 0; + return NULL; + } + *out_size -= c_meta_len; if (order && in_size < 8) { out[0] &= ~1; order &= ~1; } - rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size); + if (!rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size)) { + free(rle); + free(packed); + *out_size = 0; + return NULL; + } if (*out_size >= in_size) { out[0] &= ~3; out[0] |= RANS_ORDER_CAT | no_size; + + if (out + c_meta_len + in_size > out_end) { + free(rle); + free(packed); + *out_size = 0; + return NULL; + } if (in_size) memcpy(out+c_meta_len, in, in_size); *out_size = in_size; diff --git a/htscodecs/tokenise_name3.c b/htscodecs/tokenise_name3.c index 7493579..08e6261 100644 --- a/htscodecs/tokenise_name3.c +++ b/htscodecs/tokenise_name3.c @@ -1555,6 +1555,7 @@ uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith, if (compress(ctx->desc[i].buf, ctx->desc[i].buf_l, i&0xf, level, use_arith, out, &out_len) < 0) { free_context(ctx); + free(out); return NULL; }