Add assembly version of simple operations on aarch64

For aarch64 and arm64ec with Neon, add assembly versions of the following: * `ceil` * `ceilf` * `fabs` * `fabsf` * `floor` * `floorf` * `fma` * `fmaf` * `round` * `roundf` * `sqrt` * `sqrtf` * `trunc` * `truncf` If the `fp16` target feature is available, which implies `neon`, also include the following: * `ceilf16` * `fabsf16` * `floorf16` * `rintf16` * `roundf16` * `sqrtf16` * `truncf16` Additionally, replace `core::arch` versions of the following with handwritten assembly (which avoids issues with `aarch64be`): * `rint` * `rintf` Instructions for `fmax` and `fmin` are also available but seem to provide different results based on whether NaN inputs are signaling or quiet. Our current implementation does not do this, so omit these for now.
rust-lang · Jan 24, 2025 · 8703127 · 8703127
1 parent b67b4cc
commit 8703127
Show file tree

Hide file tree

Showing 26 changed files with 411 additions and 36 deletions.
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
@@ -107,6 +107,7 @@
     "ceil": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/arch/aarch64.rs",
             "src/math/arch/i586.rs",
             "src/math/arch/wasm32.rs",
             "src/math/ceil.rs",
@@ -116,6 +117,7 @@
     },
     "ceilf": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
             "src/math/ceilf.rs",
             "src/math/generic/ceil.rs"
@@ -131,6 +133,7 @@
     },
     "ceilf16": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/ceilf16.rs",
             "src/math/generic/ceil.rs"
         ],
@@ -274,6 +277,7 @@
     "fabs": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
             "src/math/fabs.rs",
             "src/math/generic/fabs.rs"
@@ -282,6 +286,7 @@
     },
     "fabsf": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
             "src/math/fabsf.rs",
             "src/math/generic/fabs.rs"
@@ -297,6 +302,7 @@
     },
     "fabsf16": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/fabsf16.rs",
             "src/math/generic/fabs.rs"
         ],
@@ -334,6 +340,7 @@
     "floor": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/arch/aarch64.rs",
             "src/math/arch/i586.rs",
             "src/math/arch/wasm32.rs",
             "src/math/floor.rs",
@@ -343,6 +350,7 @@
     },
     "floorf": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
             "src/math/floorf.rs",
             "src/math/generic/floor.rs"
@@ -358,6 +366,7 @@
     },
     "floorf16": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/floorf16.rs",
             "src/math/generic/floor.rs"
         ],
@@ -366,12 +375,14 @@
     "fma": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/arch/aarch64.rs",
             "src/math/fma.rs"
         ],
         "type": "f64"
     },
     "fmaf": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/fmaf.rs"
         ],
         "type": "f32"
@@ -725,6 +736,7 @@
     },
     "rintf16": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/generic/rint.rs",
             "src/math/rintf16.rs"
         ],
@@ -733,13 +745,15 @@
     "round": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/arch/aarch64.rs",
             "src/math/generic/round.rs",
             "src/math/round.rs"
         ],
         "type": "f64"
     },
     "roundf": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/generic/round.rs",
             "src/math/roundf.rs"
         ],
@@ -754,6 +768,7 @@
     },
     "roundf16": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/generic/round.rs",
             "src/math/roundf16.rs"
         ],
@@ -816,6 +831,7 @@
     "sqrt": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/arch/aarch64.rs",
             "src/math/arch/i686.rs",
             "src/math/arch/wasm32.rs",
             "src/math/generic/sqrt.rs",
@@ -825,6 +841,7 @@
     },
     "sqrtf": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/arch/i686.rs",
             "src/math/arch/wasm32.rs",
             "src/math/generic/sqrt.rs",
@@ -841,6 +858,7 @@
     },
     "sqrtf16": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/generic/sqrt.rs",
             "src/math/sqrtf16.rs"
         ],
@@ -888,6 +906,7 @@
     "trunc": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
             "src/math/generic/trunc.rs",
             "src/math/trunc.rs"
@@ -896,6 +915,7 @@
     },
     "truncf": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
             "src/math/generic/trunc.rs",
             "src/math/truncf.rs"
@@ -911,6 +931,7 @@
     },
     "truncf16": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/generic/trunc.rs",
             "src/math/truncf16.rs"
         ],