Skip to content

Commit

Permalink
Enabling fp16 execution
Browse files Browse the repository at this point in the history
  • Loading branch information
quic-shanagra committed Sep 18, 2024
1 parent 9a87fbd commit a728b23
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 0 deletions.
156 changes: 156 additions & 0 deletions src/Utils/DataUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,162 @@ datautil::StatusCode datautil::writeBinaryToFile(std::string fileDir,
return StatusCode::SUCCESS;
}


static inline float datautil::fp16_ieee_to_fp32_value(uint16_t h) {
const uint32_t w = (uint32_t) h << 16;
const uint32_t sign = w & UINT32_C(0x80000000);
const uint32_t two_w = w + w;
const uint32_t exp_offset = UINT32_C(0xE0) << 23;
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float exp_scale = 0x1.0p-112f;
#else
const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
#endif
const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
const uint32_t magic_mask = UINT32_C(126) << 23;
const float magic_bias = 0.5f;
const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
const uint32_t result = sign |
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
return fp32_from_bits(result);
}

/*
* Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in
* IEEE half-precision format, in bit representation.
*
* @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
* floating-point operations and bitcasts between integer and floating-point variables.
*/


bool datautil::floatNToFloat32(float* out,
uint8_t* in,
size_t numElements,
uint8_t bitWidth)
{
if(numElements == 0) {
return false;
}

if(bitWidth == 16){
#ifndef __hexagon__
uint16_t *temp = (uint16_t *)in;
for(size_t i = 0; i < numElements; i++){
out[i] = fp16_ieee_to_fp32_value(temp[i]);
}
#else
return false;
#endif //__hexagon__
}
else if(bitWidth == 32) {
float* inFloat = reinterpret_cast<float*>(in);
for (size_t i = 0; i < numElements; i++) {
out[i] = inFloat[i];
}
}
else {
return false;
}

return true;
}

static inline float datautil::fp32_from_bits(uint32_t w) {
#if defined(__OPENCL_VERSION__)
return as_float(w);
#elif defined(__CUDA_ARCH__)
return __uint_as_float((unsigned int) w);
#elif defined(__INTEL_COMPILER)
return _castu32_f32(w);
#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
return _CopyFloatFromInt32((__int32) w);
#else
union {
uint32_t as_bits;
float as_value;
} fp32 = { w };
return fp32.as_value;
#endif
}

static inline uint32_t datautil::fp32_to_bits(float f) {
#if defined(__OPENCL_VERSION__)
return as_uint(f);
#elif defined(__CUDA_ARCH__)
return (uint32_t) __float_as_uint(f);
#elif defined(__INTEL_COMPILER)
return _castf32_u32(f);
#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
return (uint32_t) _CopyInt32FromFloat(f);
#else
union {
float as_value;
uint32_t as_bits;
} fp32 = { f };
return fp32.as_bits;
#endif
}

static inline uint16_t datautil::fp16_ieee_from_fp32_value(float f) {
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float scale_to_inf = 0x1.0p+112f;
const float scale_to_zero = 0x1.0p-110f;
#else
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
#endif
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;

const uint32_t w = fp32_to_bits(f);
const uint32_t shl1_w = w + w;
const uint32_t sign = w & UINT32_C(0x80000000);
uint32_t bias = shl1_w & UINT32_C(0xFF000000);
if (bias < UINT32_C(0x71000000)) {
bias = UINT32_C(0x71000000);
}

base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
const uint32_t bits = fp32_to_bits(base);
const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
const uint32_t nonsign = exp_bits + mantissa_bits;
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
}

bool datautil::float32ToFloatN(uint8_t* out,
float* in,
size_t numElements,
uint8_t bitWidth)
{
if(numElements == 0) {
return false;
}

if(bitWidth == 16){
#ifndef __hexagon__
uint16_t *temp = (uint16_t *)out;
for(size_t i = 0; i < numElements; i++){
temp[i] = fp16_ieee_from_fp32_value(in[i]);
}
#else
return false;
#endif //__hexagon__
}
else if(bitWidth == 32) {
float* outFloat = reinterpret_cast<float*>(out);
for (size_t i = 0; i < numElements; i++) {
outFloat[i] = in[i];
}
}
else {
return false;
}

return true;
}

template <typename T_QuantType>
datautil::StatusCode datautil::floatToTfN(
T_QuantType* out, float* in, int32_t offset, float scale, size_t numElements) {
Expand Down
16 changes: 16 additions & 0 deletions src/Utils/DataUtil.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,22 @@ StatusCode writeBinaryToFile(std::string fileDir,
uint8_t* buffer,
size_t bufferSize);

static inline uint16_t fp16_ieee_from_fp32_value(float f);
static inline float fp16_ieee_to_fp32_value(uint16_t h);

static inline uint32_t fp32_to_bits(float f);
static inline float fp32_from_bits(uint32_t w);

bool floatNToFloat32(float* out,
uint8_t* in,
size_t numElements,
uint8_t bitWidth);

bool float32ToFloatN(uint8_t* out,
float* in,
size_t numElements,
uint8_t bitWidth);

template <typename T_QuantType>
datautil::StatusCode floatToTfN(
T_QuantType* out, float* in, int32_t offset, float scale, size_t numElements);
Expand Down
24 changes: 24 additions & 0 deletions src/Utils/IOTensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,21 @@ iotensor::StatusCode iotensor::IOTensor::copyFromFloatToNative(float* floatBuffe
fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(tensor), QNN_TENSOR_GET_RANK(tensor));

switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) {
case QNN_DATATYPE_FLOAT_16:
#ifdef __hexagon__
QNN_ERROR("failure in aiswutility::float32ToFloatN, not supported on Hexagon");
returnStatus = StatusCode::FAILURE;
#else
if (!datautil::float32ToFloatN(static_cast<uint8_t*>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
floatBuffer,
datautil::calculateElementCount(dims),
16)) {
QNN_ERROR("failure in aiswutility::float32ToFloatN");
returnStatus = StatusCode::FAILURE;
}
#endif
break;

case QNN_DATATYPE_UFIXED_POINT_8:
datautil::floatToTfN<uint8_t>(static_cast<uint8_t*>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
floatBuffer,
Expand Down Expand Up @@ -527,6 +542,7 @@ iotensor::StatusCode iotensor::IOTensor::allocateBuffer(uint8_t** buffer,
size_t elementCount = datautil::calculateElementCount(dims);
auto returnStatus = StatusCode::SUCCESS;
switch (dataType) {
case QNN_DATATYPE_FLOAT_16:
case QNN_DATATYPE_FLOAT_32:
QNN_DEBUG("allocating float buffer");
returnStatus = allocateBuffer<float>(reinterpret_cast<float**>(buffer), elementCount);
Expand Down Expand Up @@ -614,6 +630,14 @@ iotensor::StatusCode iotensor::IOTensor::convertToFloat(float** out, Qnn_Tensor_
return returnStatus;
}
switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) {
case QNN_DATATYPE_FLOAT_16:
if (!datautil::floatNToFloat32(
*out, reinterpret_cast<uint8_t*>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), elementCount, 16)) {
QNN_ERROR("failure in aiswutility::floatNToFloat32");
returnStatus = StatusCode::FAILURE;
}
break;

case QNN_DATATYPE_UFIXED_POINT_8:
if (datautil::StatusCode::SUCCESS !=
datautil::tfNToFloat<uint8_t>(
Expand Down

0 comments on commit a728b23

Please sign in to comment.