From a728b23f3b1809f8df3d91aae0d308256b917eb3 Mon Sep 17 00:00:00 2001 From: quic-shanagra Date: Wed, 18 Sep 2024 15:57:14 +0530 Subject: [PATCH] Enabling fp16 execution --- src/Utils/DataUtil.cpp | 156 +++++++++++++++++++++++++++++++++++++++++ src/Utils/DataUtil.hpp | 16 +++++ src/Utils/IOTensor.cpp | 24 +++++++ 3 files changed, 196 insertions(+) diff --git a/src/Utils/DataUtil.cpp b/src/Utils/DataUtil.cpp index 5064c9d..d3b35e2 100644 --- a/src/Utils/DataUtil.cpp +++ b/src/Utils/DataUtil.cpp @@ -276,6 +276,162 @@ datautil::StatusCode datautil::writeBinaryToFile(std::string fileDir, return StatusCode::SUCCESS; } + +static inline float datautil::fp16_ieee_to_fp32_value(uint16_t h) { + const uint32_t w = (uint32_t) h << 16; + const uint32_t sign = w & UINT32_C(0x80000000); + const uint32_t two_w = w + w; + const uint32_t exp_offset = UINT32_C(0xE0) << 23; +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) + const float exp_scale = 0x1.0p-112f; +#else + const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); +#endif + const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; + const uint32_t magic_mask = UINT32_C(126) << 23; + const float magic_bias = 0.5f; + const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; + const uint32_t denormalized_cutoff = UINT32_C(1) << 27; + const uint32_t result = sign | + (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); + return fp32_from_bits(result); +} + +/* + * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in + * IEEE half-precision format, in bit representation. + * + * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) + * floating-point operations and bitcasts between integer and floating-point variables. + */ + + +bool datautil::floatNToFloat32(float* out, + uint8_t* in, + size_t numElements, + uint8_t bitWidth) +{ + if(numElements == 0) { + return false; + } + + if(bitWidth == 16){ +#ifndef __hexagon__ + uint16_t *temp = (uint16_t *)in; + for(size_t i = 0; i < numElements; i++){ + out[i] = fp16_ieee_to_fp32_value(temp[i]); + } +#else + return false; +#endif //__hexagon__ + } + else if(bitWidth == 32) { + float* inFloat = reinterpret_cast(in); + for (size_t i = 0; i < numElements; i++) { + out[i] = inFloat[i]; + } + } + else { + return false; + } + + return true; +} + +static inline float datautil::fp32_from_bits(uint32_t w) { +#if defined(__OPENCL_VERSION__) + return as_float(w); +#elif defined(__CUDA_ARCH__) + return __uint_as_float((unsigned int) w); +#elif defined(__INTEL_COMPILER) + return _castu32_f32(w); +#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) + return _CopyFloatFromInt32((__int32) w); +#else + union { + uint32_t as_bits; + float as_value; + } fp32 = { w }; + return fp32.as_value; +#endif +} + +static inline uint32_t datautil::fp32_to_bits(float f) { +#if defined(__OPENCL_VERSION__) + return as_uint(f); +#elif defined(__CUDA_ARCH__) + return (uint32_t) __float_as_uint(f); +#elif defined(__INTEL_COMPILER) + return _castf32_u32(f); +#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) + return (uint32_t) _CopyInt32FromFloat(f); +#else + union { + float as_value; + uint32_t as_bits; + } fp32 = { f }; + return fp32.as_bits; +#endif +} + +static inline uint16_t datautil::fp16_ieee_from_fp32_value(float f) { + #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) + const float scale_to_inf = 0x1.0p+112f; + const float scale_to_zero = 0x1.0p-110f; + #else + const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); + const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); + #endif + float base = (fabsf(f) * scale_to_inf) * scale_to_zero; + + const uint32_t w = fp32_to_bits(f); + const uint32_t shl1_w = w + w; + const uint32_t sign = w & UINT32_C(0x80000000); + uint32_t bias = shl1_w & UINT32_C(0xFF000000); + if (bias < UINT32_C(0x71000000)) { + bias = UINT32_C(0x71000000); + } + + base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; + const uint32_t bits = fp32_to_bits(base); + const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); + const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); + const uint32_t nonsign = exp_bits + mantissa_bits; + return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); + } + +bool datautil::float32ToFloatN(uint8_t* out, + float* in, + size_t numElements, + uint8_t bitWidth) + { + if(numElements == 0) { + return false; + } + + if(bitWidth == 16){ + #ifndef __hexagon__ + uint16_t *temp = (uint16_t *)out; + for(size_t i = 0; i < numElements; i++){ + temp[i] = fp16_ieee_from_fp32_value(in[i]); + } + #else + return false; + #endif //__hexagon__ + } + else if(bitWidth == 32) { + float* outFloat = reinterpret_cast(out); + for (size_t i = 0; i < numElements; i++) { + outFloat[i] = in[i]; + } + } + else { + return false; + } + + return true; + } + template datautil::StatusCode datautil::floatToTfN( T_QuantType* out, float* in, int32_t offset, float scale, size_t numElements) { diff --git a/src/Utils/DataUtil.hpp b/src/Utils/DataUtil.hpp index 9867e74..c030a86 100644 --- a/src/Utils/DataUtil.hpp +++ b/src/Utils/DataUtil.hpp @@ -86,6 +86,22 @@ StatusCode writeBinaryToFile(std::string fileDir, uint8_t* buffer, size_t bufferSize); +static inline uint16_t fp16_ieee_from_fp32_value(float f); +static inline float fp16_ieee_to_fp32_value(uint16_t h); + +static inline uint32_t fp32_to_bits(float f); +static inline float fp32_from_bits(uint32_t w); + +bool floatNToFloat32(float* out, + uint8_t* in, + size_t numElements, + uint8_t bitWidth); + +bool float32ToFloatN(uint8_t* out, + float* in, + size_t numElements, + uint8_t bitWidth); + template datautil::StatusCode floatToTfN( T_QuantType* out, float* in, int32_t offset, float scale, size_t numElements); diff --git a/src/Utils/IOTensor.cpp b/src/Utils/IOTensor.cpp index 26c0885..ee9d6b2 100644 --- a/src/Utils/IOTensor.cpp +++ b/src/Utils/IOTensor.cpp @@ -71,6 +71,21 @@ iotensor::StatusCode iotensor::IOTensor::copyFromFloatToNative(float* floatBuffe fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(tensor), QNN_TENSOR_GET_RANK(tensor)); switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) { + case QNN_DATATYPE_FLOAT_16: +#ifdef __hexagon__ + QNN_ERROR("failure in aiswutility::float32ToFloatN, not supported on Hexagon"); + returnStatus = StatusCode::FAILURE; +#else + if (!datautil::float32ToFloatN(static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, + datautil::calculateElementCount(dims), + 16)) { + QNN_ERROR("failure in aiswutility::float32ToFloatN"); + returnStatus = StatusCode::FAILURE; + } +#endif + break; + case QNN_DATATYPE_UFIXED_POINT_8: datautil::floatToTfN(static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), floatBuffer, @@ -527,6 +542,7 @@ iotensor::StatusCode iotensor::IOTensor::allocateBuffer(uint8_t** buffer, size_t elementCount = datautil::calculateElementCount(dims); auto returnStatus = StatusCode::SUCCESS; switch (dataType) { + case QNN_DATATYPE_FLOAT_16: case QNN_DATATYPE_FLOAT_32: QNN_DEBUG("allocating float buffer"); returnStatus = allocateBuffer(reinterpret_cast(buffer), elementCount); @@ -614,6 +630,14 @@ iotensor::StatusCode iotensor::IOTensor::convertToFloat(float** out, Qnn_Tensor_ return returnStatus; } switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) { + case QNN_DATATYPE_FLOAT_16: + if (!datautil::floatNToFloat32( + *out, reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), elementCount, 16)) { + QNN_ERROR("failure in aiswutility::floatNToFloat32"); + returnStatus = StatusCode::FAILURE; + } + break; + case QNN_DATATYPE_UFIXED_POINT_8: if (datautil::StatusCode::SUCCESS != datautil::tfNToFloat(