Skip to content

Commit

Permalink
Add assembly version of simple operations on aarch64
Browse files Browse the repository at this point in the history
For aarch64 and arm64ec with Neon, add assembly versions of the
following:

* `ceil`
* `ceilf`
* `fabs`
* `fabsf`
* `floor`
* `floorf`
* `fma`
* `fmaf`
* `round`
* `roundf`
* `sqrt`
* `sqrtf`
* `trunc`
* `truncf`

If the `fp16` target feature is available, which implies `neon`, also
include the following:

* `ceilf16`
* `fabsf16`
* `floorf16`
* `rintf16`
* `roundf16`
* `sqrtf16`
* `truncf16`

Additionally, replace `core::arch` versions of the following with
handwritten assembly (which avoids issues with `aarch64be`):

* `rint`
* `rintf`

Instructions for `fmax` and `fmin` are also available but seem to
provide different results based on whether NaN inputs are signaling or
quiet. Our current implementation does not do this, so omit these for
now.
  • Loading branch information
tgross35 committed Jan 24, 2025
1 parent b67b4cc commit 8703127
Show file tree
Hide file tree
Showing 26 changed files with 411 additions and 36 deletions.
21 changes: 21 additions & 0 deletions etc/function-definitions.json
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
"ceil": {
"sources": [
"src/libm_helper.rs",
"src/math/arch/aarch64.rs",
"src/math/arch/i586.rs",
"src/math/arch/wasm32.rs",
"src/math/ceil.rs",
Expand All @@ -116,6 +117,7 @@
},
"ceilf": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/arch/wasm32.rs",
"src/math/ceilf.rs",
"src/math/generic/ceil.rs"
Expand All @@ -131,6 +133,7 @@
},
"ceilf16": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/ceilf16.rs",
"src/math/generic/ceil.rs"
],
Expand Down Expand Up @@ -274,6 +277,7 @@
"fabs": {
"sources": [
"src/libm_helper.rs",
"src/math/arch/aarch64.rs",
"src/math/arch/wasm32.rs",
"src/math/fabs.rs",
"src/math/generic/fabs.rs"
Expand All @@ -282,6 +286,7 @@
},
"fabsf": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/arch/wasm32.rs",
"src/math/fabsf.rs",
"src/math/generic/fabs.rs"
Expand All @@ -297,6 +302,7 @@
},
"fabsf16": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/fabsf16.rs",
"src/math/generic/fabs.rs"
],
Expand Down Expand Up @@ -334,6 +340,7 @@
"floor": {
"sources": [
"src/libm_helper.rs",
"src/math/arch/aarch64.rs",
"src/math/arch/i586.rs",
"src/math/arch/wasm32.rs",
"src/math/floor.rs",
Expand All @@ -343,6 +350,7 @@
},
"floorf": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/arch/wasm32.rs",
"src/math/floorf.rs",
"src/math/generic/floor.rs"
Expand All @@ -358,6 +366,7 @@
},
"floorf16": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/floorf16.rs",
"src/math/generic/floor.rs"
],
Expand All @@ -366,12 +375,14 @@
"fma": {
"sources": [
"src/libm_helper.rs",
"src/math/arch/aarch64.rs",
"src/math/fma.rs"
],
"type": "f64"
},
"fmaf": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/fmaf.rs"
],
"type": "f32"
Expand Down Expand Up @@ -725,6 +736,7 @@
},
"rintf16": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/generic/rint.rs",
"src/math/rintf16.rs"
],
Expand All @@ -733,13 +745,15 @@
"round": {
"sources": [
"src/libm_helper.rs",
"src/math/arch/aarch64.rs",
"src/math/generic/round.rs",
"src/math/round.rs"
],
"type": "f64"
},
"roundf": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/generic/round.rs",
"src/math/roundf.rs"
],
Expand All @@ -754,6 +768,7 @@
},
"roundf16": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/generic/round.rs",
"src/math/roundf16.rs"
],
Expand Down Expand Up @@ -816,6 +831,7 @@
"sqrt": {
"sources": [
"src/libm_helper.rs",
"src/math/arch/aarch64.rs",
"src/math/arch/i686.rs",
"src/math/arch/wasm32.rs",
"src/math/generic/sqrt.rs",
Expand All @@ -825,6 +841,7 @@
},
"sqrtf": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/arch/i686.rs",
"src/math/arch/wasm32.rs",
"src/math/generic/sqrt.rs",
Expand All @@ -841,6 +858,7 @@
},
"sqrtf16": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/generic/sqrt.rs",
"src/math/sqrtf16.rs"
],
Expand Down Expand Up @@ -888,6 +906,7 @@
"trunc": {
"sources": [
"src/libm_helper.rs",
"src/math/arch/aarch64.rs",
"src/math/arch/wasm32.rs",
"src/math/generic/trunc.rs",
"src/math/trunc.rs"
Expand All @@ -896,6 +915,7 @@
},
"truncf": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/arch/wasm32.rs",
"src/math/generic/trunc.rs",
"src/math/truncf.rs"
Expand All @@ -911,6 +931,7 @@
},
"truncf16": {
"sources": [
"src/math/arch/aarch64.rs",
"src/math/generic/trunc.rs",
"src/math/truncf16.rs"
],
Expand Down
Loading

0 comments on commit 8703127

Please sign in to comment.