From 1a20c4ef3b2b14f27238f91e5bcae3436b058646 Mon Sep 17 00:00:00 2001 From: Dmitriy Musatkin <63878209+DmitriyMusatkin@users.noreply.github.com> Date: Fri, 13 Sep 2024 13:36:21 -0700 Subject: [PATCH] Bind out crc64 (#597) --- awscrt/checksums.py | 9 +++++++ crt/aws-c-common | 2 +- crt/aws-checksums | 2 +- source/checksums.h | 1 + source/crc.c | 59 +++++++++++++++++++++++++++++++++--------- source/module.c | 1 + test/test_checksums.py | 42 ++++++++++++++++++++++++++++++ 7 files changed, 102 insertions(+), 14 deletions(-) diff --git a/awscrt/checksums.py b/awscrt/checksums.py index 2ea6a3f64..06a0005b1 100644 --- a/awscrt/checksums.py +++ b/awscrt/checksums.py @@ -21,3 +21,12 @@ def crc32c(input: bytes, previous_crc32c: int = 0) -> int: Returns an unsigned 32-bit integer. """ return _awscrt.checksums_crc32c(input, previous_crc32c) + + +def crc64nvme(input: bytes, previous_crc64nvme: int = 0) -> int: + """ + Perform a CRC64 NVME computation. + If continuing to update a running CRC, pass its value into `previous_crc64nvme`. + Returns an unsigned 64-bit integer. + """ + return _awscrt.checksums_crc64nvme(input, previous_crc64nvme) diff --git a/crt/aws-c-common b/crt/aws-c-common index 672cc0032..b9959f592 160000 --- a/crt/aws-c-common +++ b/crt/aws-c-common @@ -1 +1 @@ -Subproject commit 672cc0032eb28d69fbdd22c9463253c89d7a6f30 +Subproject commit b9959f5922a4b969beab8f0b99aa0b34bc9ee55c diff --git a/crt/aws-checksums b/crt/aws-checksums index aac442a2d..ce04ab00b 160000 --- a/crt/aws-checksums +++ b/crt/aws-checksums @@ -1 +1 @@ -Subproject commit aac442a2dbbb5e72d0a3eca8313cf65e7e1cac2f +Subproject commit ce04ab00b3ecc41912f478bfedca39f8e1919d6b diff --git a/source/checksums.h b/source/checksums.h index 9ee27297d..824d743b2 100644 --- a/source/checksums.h +++ b/source/checksums.h @@ -8,5 +8,6 @@ PyObject *aws_py_checksums_crc32(PyObject *self, PyObject *args); PyObject *aws_py_checksums_crc32c(PyObject *self, PyObject *args); +PyObject *aws_py_checksums_crc64nvme(PyObject *self, PyObject *args); #endif /* AWS_CRT_PYTHON_CHECKSUMS_H */ diff --git a/source/crc.c b/source/crc.c index 67eee879e..b02fc4759 100644 --- a/source/crc.c +++ b/source/crc.c @@ -7,7 +7,7 @@ #include "aws/checksums/crc.h" #include "aws/common/byte_buf.h" -PyObject *checksums_crc_common(PyObject *args, uint32_t (*checksum_fn)(const uint8_t *, int, uint32_t)) { +PyObject *checksums_crc32_common(PyObject *args, uint32_t (*checksum_fn)(const uint8_t *, size_t, uint32_t)) { Py_buffer input; PyObject *py_previousCrc; PyObject *py_result = NULL; @@ -39,18 +39,11 @@ PyObject *checksums_crc_common(PyObject *args, uint32_t (*checksum_fn)(const uin /* clang-format off */ Py_BEGIN_ALLOW_THREADS - /* Avoid truncation of length for very large buffers. crc() takes - length as an int, which may be narrower than Py_ssize_t. */ - while ((size_t)len > INT_MAX) { - val = checksum_fn(buf, INT_MAX, val); - buf += (size_t)INT_MAX; - len -= (size_t)INT_MAX; - } - val = checksum_fn(buf, (int)len, val); + val = checksum_fn(buf, (size_t)len, val); Py_END_ALLOW_THREADS /* clang-format on */ } else { - val = checksum_fn(input.buf, (int)input.len, val); + val = checksum_fn(input.buf, (size_t)input.len, val); } py_result = PyLong_FromUnsignedLong(val); done: @@ -62,10 +55,52 @@ PyObject *checksums_crc_common(PyObject *args, uint32_t (*checksum_fn)(const uin PyObject *aws_py_checksums_crc32(PyObject *self, PyObject *args) { (void)self; - return checksums_crc_common(args, aws_checksums_crc32); + return checksums_crc32_common(args, aws_checksums_crc32_ex); } PyObject *aws_py_checksums_crc32c(PyObject *self, PyObject *args) { (void)self; - return checksums_crc_common(args, aws_checksums_crc32c); + return checksums_crc32_common(args, aws_checksums_crc32c_ex); +} + +PyObject *aws_py_checksums_crc64nvme(PyObject *self, PyObject *args) { + (void)self; + Py_buffer input; + PyObject *py_previousCrc64; + PyObject *py_result = NULL; + + if (!PyArg_ParseTuple(args, "s*O", &input, &py_previousCrc64)) { + return NULL; + } + + /* Note: PyArg_ParseTuple() doesn't do overflow checking on unsigned values + * so use PyLong_AsUnsignedLongLong() to get the value of the previousCrc arg */ + uint64_t previousCrc = PyLong_AsUnsignedLongLong(py_previousCrc64); + + if (previousCrc == (uint64_t)-1 && PyErr_Occurred()) { + goto done; + } + + if (!PyBuffer_IsContiguous(&input, 'C')) { + PyErr_SetString(PyExc_ValueError, "input must be contiguous buffer"); + goto done; + } + + /* Releasing the GIL for very small buffers is inefficient + and may lower performance */ + if (input.len > 1024 * 5) { + /* clang-format off */ + Py_BEGIN_ALLOW_THREADS + previousCrc = aws_checksums_crc64nvme_ex(input.buf, (size_t)input.len, previousCrc); + Py_END_ALLOW_THREADS + /* clang-format on */ + } else { + previousCrc = aws_checksums_crc64nvme_ex(input.buf, (size_t)input.len, previousCrc); + } + py_result = PyLong_FromUnsignedLongLong(previousCrc); +done: + if (input.obj) { + PyBuffer_Release(&input); + } + return py_result; } diff --git a/source/module.c b/source/module.c index 983054e79..4230ab6bf 100644 --- a/source/module.c +++ b/source/module.c @@ -730,6 +730,7 @@ static PyMethodDef s_module_methods[] = { /* Checksum primitives */ AWS_PY_METHOD_DEF(checksums_crc32, METH_VARARGS), AWS_PY_METHOD_DEF(checksums_crc32c, METH_VARARGS), + AWS_PY_METHOD_DEF(checksums_crc64nvme, METH_VARARGS), /* HTTP */ AWS_PY_METHOD_DEF(http_connection_close, METH_VARARGS), diff --git a/test/test_checksums.py b/test/test_checksums.py index 0f4e982d7..5d46581b8 100644 --- a/test/test_checksums.py +++ b/test/test_checksums.py @@ -95,6 +95,48 @@ def test_crc32c_huge_buffer(self): val = checksums.crc32c(huge_buffer) self.assertEqual(0x572a7c8a, val) + def test_crc64nvme_zeros_one_shot(self): + output = checksums.crc64nvme(bytes(32)) + expected = 0xcf3473434d4ecf3b + self.assertEqual(expected, output) + + def test_crc64nvme_zeros_iterated(self): + output = 0 + for i in range(32): + output = checksums.crc64nvme(bytes(1), output) + expected = 0xcf3473434d4ecf3b + self.assertEqual(expected, output) + + def test_crc64nvme_values_one_shot(self): + output = checksums.crc64nvme(''.join(chr(i) for i in range(32))) + expected = 0xb9d9d4a8492cbd7f + self.assertEqual(expected, output) + + def test_crc64nvme_values_iterated(self): + output = 0 + for i in range(32): + output = checksums.crc64nvme(chr(i), output) + expected = 0xb9d9d4a8492cbd7f + self.assertEqual(expected, output) + + def test_crc64nvme_large_buffer(self): + # stress test gil optimization for 32 bit architecture which cannot handle huge buffer + large_buffer = bytes(25 * 2**20) + val = checksums.crc64nvme(large_buffer) + self.assertEqual(0x5b6f5045463ca45e, val) + + def test_crc64nvme_huge_buffer(self): + if sys.platform.startswith('freebsd'): + # Skip this test for freebsd, as it simply crashes instead of raising exception in this case + raise unittest.SkipTest('Skip this test for freebsd') + try: + INT_MAX = 2**32 - 1 + huge_buffer = bytes(INT_MAX + 5) + except BaseException: + raise unittest.SkipTest('Machine cant allocate giant buffer for giant buffer test') + val = checksums.crc64nvme(huge_buffer) + self.assertEqual(0x2645c28052b1fbb0, val) + if __name__ == '__main__': unittest.main()