Skip to content

Commit

Permalink
Stricter limit on POS/MPOS/TLEN in sam_parse1()
Browse files Browse the repository at this point in the history
Help avoid overflow on arithmetic involving POS, MPOS and TLEN
by limiting values in the SAM parser to fit in 62 bits (or 63
for TLEN as it's signed).  The new limit is still massively bigger
than any known reference so it should not cause any problems
in practice.

Credit to OSS-Fuzz
Fixes oss-fuzz 68750
  • Loading branch information
daviesrob committed Jul 29, 2024
1 parent fbe5ff6 commit 9a4b660
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
7 changes: 4 additions & 3 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -2947,7 +2947,7 @@ int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b)
} else c->tid = -1;

// pos
c->pos = hts_str2uint(p, &p, 63, &overflow) - 1;
c->pos = hts_str2uint(p, &p, 62, &overflow) - 1;
if (*p++ != '\t') goto err_ret;
if (c->pos < 0 && c->tid >= 0) {
_parse_warn(1, "mapped query cannot have zero coordinate; treated as unmapped");
Expand Down Expand Up @@ -2990,15 +2990,16 @@ int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b)
_parse_warn(c->mtid < 0, "unrecognized mate reference name %s; treated as unmapped", hts_strprint(logbuf, sizeof logbuf, '"', q, SIZE_MAX));
}
// mpos
c->mpos = hts_str2uint(p, &p, 63, &overflow) - 1;
c->mpos = hts_str2uint(p, &p, 62, &overflow) - 1;
if (*p++ != '\t') goto err_ret;
if (c->mpos < 0 && c->mtid >= 0) {
_parse_warn(1, "mapped mate cannot have zero coordinate; treated as unmapped");
c->mtid = -1;
}
// tlen
c->isize = hts_str2int(p, &p, 64, &overflow);
c->isize = hts_str2int(p, &p, 63, &overflow);
if (*p++ != '\t') goto err_ret;
_parse_err(overflow, "number outside allowed range");
// seq
q = _read_token(p);
if (strcmp(q, "*")) {
Expand Down
18 changes: 9 additions & 9 deletions test/sam.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* test/sam.c -- SAM/BAM/CRAM API test cases.
Copyright (C) 2014-2020, 2022-2023 Genome Research Ltd.
Copyright (C) 2014-2020, 2022-2024 Genome Research Ltd.
Author: John Marshall <[email protected]>
Expand Down Expand Up @@ -1408,16 +1408,16 @@ static void check_big_ref(int parse_header)
"@HD\tVN:1.4\n"
"@SQ\tSN:large#1\tLN:5000000000\n"
"@SQ\tSN:small#1\tLN:100\n"
"@SQ\tSN:large#2\tLN:9223372034707292158\n"
"@SQ\tSN:large#2\tLN:4611686018427387904\n"
"@SQ\tSN:small#2\tLN:1\n"
"r1\t0\tlarge#1\t4999999000\t50\t8M\t*\t0\t0\tACGTACGT\tabcdefgh\n"
"r2\t0\tsmall#1\t1\t50\t8M\t*\t0\t0\tACGTACGT\tabcdefgh\n"
"r3\t0\tlarge#2\t9223372034707292000\t50\t8M\t*\t0\t0\tACGTACGT\tabcdefgh\n"
"p1\t99\tlarge#2\t1\t50\t8M\t=\t9223372034707292150\t9223372034707292158\tACGTACGT\tabcdefgh\n"
"p1\t147\tlarge#2\t9223372034707292150\t50\t8M\t=\t1\t-9223372034707292158\tACGTACGT\tabcdefgh\n"
"r3\t0\tlarge#2\t4611686018427387000\t50\t8M\t*\t0\t0\tACGTACGT\tabcdefgh\n"
"p1\t99\tlarge#2\t1\t50\t8M\t=\t4611686018427387895\t4611686018427387903\tACGTACGT\tabcdefgh\n"
"p1\t147\tlarge#2\t4611686018427387895\t50\t8M\t=\t1\t-4611686018427387903\tACGTACGT\tabcdefgh\n"
"r4\t0\tsmall#2\t2\t50\t8M\t*\t0\t0\tACGTACGT\tabcdefgh\n";
const hts_pos_t expected_lengths[] = {
5000000000LL, 100LL, 9223372034707292158LL, 1LL
5000000000LL, 100LL, 4611686018427387904LL, 1LL
};
const int expected_tids[] = {
0, 1, 2, 2, 2, 3
Expand All @@ -1426,11 +1426,11 @@ static void check_big_ref(int parse_header)
-1, -1, -1, 2, 2, -1
};
const hts_pos_t expected_positions[] = {
4999999000LL - 1, 1LL - 1, 9223372034707292000LL - 1, 1LL - 1,
9223372034707292150LL - 1, 2LL - 1
4999999000LL - 1, 1LL - 1, 4611686018427387000LL - 1, 1LL - 1,
4611686018427387895LL - 1, 2LL - 1
};
const hts_pos_t expected_mpos[] = {
-1, -1, -1, 9223372034707292150LL - 1, 1LL - 1, -1
-1, -1, -1, 4611686018427387895LL - 1, 1LL - 1, -1
};
samFile *in = NULL, *out = NULL;
sam_hdr_t *header = NULL, *dup_header = NULL;
Expand Down

0 comments on commit 9a4b660

Please sign in to comment.