From 34a8c8f94468ed4b117f501ef7220f437274b6af Mon Sep 17 00:00:00 2001
From: Eric Buehler <65165915+EricLBuehler@users.noreply.github.com>
Date: Tue, 7 Jan 2025 06:11:54 -0500
Subject: [PATCH] chore: diffuse-rs -> diffusion-rs (#26)
* Rename diffuse-rs to diffusion-rs
* Format
* Don't run the code analysis
* Reset version
---
.github/workflows/analysis.yaml | 64 --------
.github/workflows/docs.yaml | 6 +-
.typos.toml | 8 +-
Cargo.lock | 36 ++---
Cargo.toml | 18 +--
FEATURE_FLAGS.md | 2 +-
INSTALL.md | 52 +++----
README.md | 20 +--
diffuse_rs_backend/README.md | 3 -
diffuse_rs_cli/README.md | 22 ---
diffuse_rs_common/README.md | 3 -
diffuse_rs_py/generate_wheels.sh | 64 --------
.../Cargo.toml | 8 +-
diffusion_rs_backend/README.md | 3 +
.../build.rs | 0
.../kernels/bitsandbytes/dequant.cu | 0
.../src/bitsandbytes/ffi.rs | 2 +-
.../src/bitsandbytes/mod.rs | 34 ++---
.../src/bitsandbytes/op.rs | 144 +++++++++---------
.../src/cublaslt/api.rs | 134 ++++++++--------
.../src/cublaslt/matmul.rs | 8 +-
.../src/cublaslt/mod.rs | 10 +-
.../src/gguf/mod.rs | 6 +-
.../src/lib.rs | 12 +-
.../src/metal_kernels/bnb_dequantize.metal | 0
.../src/metal_kernels/mod.rs | 2 +-
.../src/metal_kernels/sdpa.metal | 0
.../src/metal_kernels/utils.rs | 0
.../src/ops.rs | 44 +++---
.../src/unquantized/mod.rs | 4 +-
.../Cargo.toml | 16 +-
diffusion_rs_cli/README.md | 22 +++
.../src/main.rs | 4 +-
.../Cargo.toml | 2 +-
diffusion_rs_common/README.md | 3 +
.../build.rs | 0
.../src/core/LICENSE | 0
.../src/core/accelerate.rs | 0
.../src/core/backend.rs | 0
.../src/core/backprop.rs | 0
.../src/core/conv.rs | 0
.../src/core/convert.rs | 0
.../src/core/cpu/avx.rs | 0
.../src/core/cpu/erf.rs | 0
.../src/core/cpu/kernels.rs | 0
.../src/core/cpu/mod.rs | 0
.../src/core/cpu/neon.rs | 0
.../src/core/cpu/simd128.rs | 0
.../src/core/cpu_backend/mod.rs | 0
.../src/core/cpu_backend/utils.rs | 0
.../src/core/cuda_backend/cudnn.rs | 0
.../src/core/cuda_backend/device.rs | 0
.../src/core/cuda_backend/error.rs | 0
.../src/core/cuda_backend/mod.rs | 0
.../src/core/cuda_backend/utils.rs | 0
.../src/core/custom_op.rs | 0
.../src/core/device.rs | 0
.../src/core/display.rs | 0
.../src/core/dtype.rs | 0
.../src/core/dummy_cuda_backend.rs | 0
.../src/core/dummy_metal_backend.rs | 0
.../src/core/error.rs | 0
.../src/core/indexer.rs | 16 +-
.../src/core/layout.rs | 0
.../src/core/metal_backend/device.rs | 0
.../src/core/metal_backend/mod.rs | 0
.../src/core/mkl.rs | 0
.../src/core/mod.rs | 0
.../src/core/npy.rs | 0
.../src/core/op.rs | 0
.../src/core/pickle.rs | 0
.../src/core/quantized/avx.rs | 0
.../src/core/quantized/cuda.rs | 0
.../src/core/quantized/dummy_cuda.rs | 0
.../src/core/quantized/dummy_metal.rs | 0
.../src/core/quantized/ggml_file.rs | 0
.../src/core/quantized/gguf_file.rs | 0
.../src/core/quantized/imatrix_file.rs | 0
.../src/core/quantized/k_quants.rs | 0
.../src/core/quantized/metal.rs | 0
.../src/core/quantized/mod.rs | 0
.../src/core/quantized/neon.rs | 0
.../src/core/quantized/simd128.rs | 0
.../src/core/quantized/utils.rs | 0
.../src/core/safetensors.rs | 0
.../src/core/scalar.rs | 0
.../src/core/shape.rs | 0
.../src/core/sort.rs | 0
.../src/core/storage.rs | 0
.../src/core/streaming.rs | 0
.../src/core/strided_index.rs | 0
.../src/core/tensor.rs | 126 +++++++--------
.../src/core/tensor_cat.rs | 4 +-
.../src/core/tensor_indexing.rs | 4 +-
.../src/core/test_utils.rs | 0
.../src/core/tests/conv_tests.rs | 4 +-
.../src/core/tests/custom_op_tests.rs | 26 ++--
.../src/core/tests/display_tests.rs | 2 +-
.../src/core/tests/fortran_tensor_3d.pth | Bin
.../src/core/tests/grad_tests.rs | 2 +-
.../src/core/tests/indexing_tests.rs | 2 +-
.../src/core/tests/layout_tests.rs | 0
.../src/core/tests/matmul_tests.rs | 2 +-
.../src/core/tests/npy.py | 0
.../src/core/tests/pool_tests.rs | 2 +-
.../src/core/tests/pth.py | 0
.../src/core/tests/pth_tests.rs | 6 +-
.../src/core/tests/quantized_tests.rs | 2 +-
.../src/core/tests/serialization_tests.rs | 12 +-
.../src/core/tests/tensor_tests.rs | 4 +-
.../src/core/tests/test.npy | Bin
.../src/core/tests/test.npz | Bin
.../src/core/tests/test.pt | Bin
.../src/core/tests/test_with_key.pt | Bin
.../src/core/utils.rs | 0
.../src/core/variable.rs | 0
.../src/cuda_kernels/LICENSE | 0
.../src/cuda_kernels/affine.cu | 0
.../src/cuda_kernels/binary.cu | 0
.../src/cuda_kernels/binary_op_macros.cuh | 0
.../src/cuda_kernels/cast.cu | 0
.../src/cuda_kernels/compatibility.cuh | 0
.../src/cuda_kernels/conv.cu | 0
.../src/cuda_kernels/cuda_utils.cuh | 0
.../src/cuda_kernels/fill.cu | 0
.../src/cuda_kernels/fused_rms_norm.cu | 0
.../src/cuda_kernels/fused_rope.cu | 0
.../src/cuda_kernels/indexing.cu | 0
.../src/cuda_kernels/kvconcat.cu | 0
.../src/cuda_kernels/mod.rs | 0
.../src/cuda_kernels/quantized.cu | 0
.../src/cuda_kernels/reduce.cu | 0
.../src/cuda_kernels/sort.cu | 0
.../src/cuda_kernels/ternary.cu | 0
.../src/cuda_kernels/unary.cu | 0
.../src/lib.rs | 0
.../src/metal_kernels/LICENSE | 0
.../src/metal_kernels/affine.metal | 0
.../src/metal_kernels/binary.metal | 0
.../src/metal_kernels/cast.metal | 0
.../src/metal_kernels/conv.metal | 0
.../src/metal_kernels/fill.metal | 0
.../src/metal_kernels/indexing.metal | 0
.../libMetalFlashAttention.metallib | Bin
.../src/metal_kernels/mlx_gemm.metal | 0
.../src/metal_kernels/mod.rs | 0
.../src/metal_kernels/quantized.metal | 0
.../src/metal_kernels/random.metal | 0
.../src/metal_kernels/reduce.metal | 0
.../scaled_dot_product_attention.metal | 0
.../src/metal_kernels/sort.metal | 0
.../src/metal_kernels/ternary.metal | 0
.../src/metal_kernels/tests.rs | 0
.../src/metal_kernels/unary.metal | 0
.../src/metal_kernels/utils.rs | 0
.../src/model_source.rs | 2 +-
.../src/nn/LICENSE | 0
.../src/nn/activation.rs | 0
.../src/nn/attention.rs | 0
.../src/nn/batch_norm.rs | 0
.../src/nn/conv.rs | 0
.../src/nn/embedding.rs | 0
.../src/nn/encoding.rs | 12 +-
.../src/nn/func.rs | 0
.../src/nn/group_norm.rs | 0
.../src/nn/init.rs | 0
.../src/nn/kv_cache.rs | 0
.../src/nn/layer_norm.rs | 6 +-
.../src/nn/linear.rs | 6 +-
.../src/nn/loss.rs | 0
.../src/nn/mod.rs | 0
.../src/nn/ops.rs | 8 +-
.../src/nn/optim.rs | 0
.../src/nn/rnn.rs | 0
.../src/nn/rope.rs | 0
.../src/nn/rotary_emb.rs | 0
.../src/nn/sequential.rs | 0
.../src/nn/tests/batch_norm.rs | 2 +-
.../src/nn/tests/group_norm.rs | 2 +-
.../src/nn/tests/kv_cache.rs | 4 +-
.../src/nn/tests/layer_norm.rs | 2 +-
.../src/nn/tests/loss.rs | 8 +-
.../src/nn/tests/one_hot.rs | 2 +-
.../src/nn/tests/ops.rs | 42 ++---
.../src/nn/tests/optim.rs | 6 +-
.../src/nn/tests/rnn.rs | 10 +-
.../src/nn/tests/sdpa.rs | 44 +++---
.../src/nn/var_builder.rs | 6 +-
.../src/nn/var_map.rs | 0
.../src/nn_wrap.rs | 0
.../src/progress.rs | 0
.../src/safetensors.rs | 0
.../src/tokenizer.rs | 0
.../src/tokens.rs | 0
.../src/varbuilder.rs | 0
.../src/varbuilder_loading.rs | 0
.../Cargo.toml | 18 +--
.../src/lib.rs | 6 +-
.../src/models/clip/mod.rs | 0
.../src/models/clip/text.rs | 67 ++++----
.../src/models/flux/mod.rs | 0
.../src/models/flux/model.rs | 89 ++++++-----
.../src/models/mod.rs | 4 +-
.../src/models/t5/mod.rs | 16 +-
.../src/models/vaes/autoencoder_kl.rs | 10 +-
.../src/models/vaes/mod.rs | 4 +-
.../src/models/vaes/vae.rs | 46 +++---
.../src/pipelines/flux/mod.rs | 18 +--
.../src/pipelines/flux/sampling.rs | 2 +-
.../src/pipelines/mod.rs | 8 +-
.../src/pipelines/sampling.rs | 2 +-
.../src/pipelines/scheduler.rs | 2 +-
.../Cargo.toml | 6 +-
.../examples/dduf/README.md | 0
.../examples/dduf/main.rs | 4 +-
.../examples/flux/README.md | 0
.../examples/flux/main.rs | 4 +-
{diffuse_rs_py => diffusion_rs_py}/Cargo.toml | 16 +-
{diffuse_rs_py => diffusion_rs_py}/build.rs | 0
.../diffuse_rs.pyi | 0
.../examples/dduf.py | 2 +-
.../examples/flux.py | 2 +-
diffusion_rs_py/generate_wheels.sh | 64 ++++++++
.../pyproject.toml | 4 +-
{diffuse_rs_py => diffusion_rs_py}/src/lib.rs | 18 +--
225 files changed, 749 insertions(+), 795 deletions(-)
delete mode 100644 .github/workflows/analysis.yaml
delete mode 100644 diffuse_rs_backend/README.md
delete mode 100644 diffuse_rs_cli/README.md
delete mode 100644 diffuse_rs_common/README.md
delete mode 100644 diffuse_rs_py/generate_wheels.sh
rename {diffuse_rs_backend => diffusion_rs_backend}/Cargo.toml (77%)
create mode 100644 diffusion_rs_backend/README.md
rename {diffuse_rs_backend => diffusion_rs_backend}/build.rs (100%)
rename {diffuse_rs_backend => diffusion_rs_backend}/kernels/bitsandbytes/dequant.cu (100%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/bitsandbytes/ffi.rs (97%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/bitsandbytes/mod.rs (90%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/bitsandbytes/op.rs (82%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/cublaslt/api.rs (70%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/cublaslt/matmul.rs (98%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/cublaslt/mod.rs (91%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/gguf/mod.rs (93%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/lib.rs (94%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/metal_kernels/bnb_dequantize.metal (100%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/metal_kernels/mod.rs (99%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/metal_kernels/sdpa.metal (100%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/metal_kernels/utils.rs (100%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/ops.rs (84%)
rename {diffuse_rs_backend => diffusion_rs_backend}/src/unquantized/mod.rs (94%)
rename {diffuse_rs_cli => diffusion_rs_cli}/Cargo.toml (57%)
create mode 100644 diffusion_rs_cli/README.md
rename {diffuse_rs_cli => diffusion_rs_cli}/src/main.rs (97%)
rename {diffuse_rs_common => diffusion_rs_common}/Cargo.toml (98%)
create mode 100644 diffusion_rs_common/README.md
rename {diffuse_rs_common => diffusion_rs_common}/build.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/LICENSE (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/accelerate.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/backend.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/backprop.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/conv.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/convert.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/avx.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/erf.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/kernels.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/mod.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/neon.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/simd128.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu_backend/mod.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu_backend/utils.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cuda_backend/cudnn.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cuda_backend/device.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cuda_backend/error.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cuda_backend/mod.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/cuda_backend/utils.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/custom_op.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/device.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/display.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/dtype.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/dummy_cuda_backend.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/dummy_metal_backend.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/error.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/indexer.rs (93%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/layout.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/metal_backend/device.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/metal_backend/mod.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/mkl.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/mod.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/npy.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/op.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/pickle.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/avx.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/cuda.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/dummy_cuda.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/dummy_metal.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/ggml_file.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/gguf_file.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/imatrix_file.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/k_quants.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/metal.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/mod.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/neon.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/simd128.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/utils.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/safetensors.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/scalar.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/shape.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/sort.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/storage.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/streaming.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/strided_index.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tensor.rs (96%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tensor_cat.rs (98%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tensor_indexing.rs (99%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/test_utils.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/conv_tests.rs (99%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/custom_op_tests.rs (83%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/display_tests.rs (97%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/fortran_tensor_3d.pth (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/grad_tests.rs (99%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/indexing_tests.rs (99%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/layout_tests.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/matmul_tests.rs (98%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/npy.py (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/pool_tests.rs (97%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/pth.py (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/pth_tests.rs (66%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/quantized_tests.rs (99%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/serialization_tests.rs (76%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/tensor_tests.rs (99%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/test.npy (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/test.npz (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/test.pt (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/test_with_key.pt (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/utils.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/core/variable.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/LICENSE (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/affine.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/binary.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/binary_op_macros.cuh (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/cast.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/compatibility.cuh (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/conv.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/cuda_utils.cuh (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/fill.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/fused_rms_norm.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/fused_rope.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/indexing.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/kvconcat.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/mod.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/quantized.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/reduce.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/sort.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/ternary.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/unary.cu (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/lib.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/LICENSE (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/affine.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/binary.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/cast.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/conv.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/fill.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/indexing.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/libMetalFlashAttention.metallib (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/mlx_gemm.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/mod.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/quantized.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/random.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/reduce.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/scaled_dot_product_attention.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/sort.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/ternary.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/tests.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/unary.metal (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/utils.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/model_source.rs (99%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/LICENSE (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/activation.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/attention.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/batch_norm.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/conv.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/embedding.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/encoding.rs (93%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/func.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/group_norm.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/init.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/kv_cache.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/layer_norm.rs (98%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/linear.rs (94%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/loss.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/mod.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/ops.rs (99%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/optim.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/rnn.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/rope.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/rotary_emb.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/sequential.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/batch_norm.rs (98%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/group_norm.rs (98%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/kv_cache.rs (96%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/layer_norm.rs (97%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/loss.rs (87%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/one_hot.rs (98%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/ops.rs (84%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/optim.rs (96%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/rnn.rs (91%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/sdpa.rs (83%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/var_builder.rs (99%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn/var_map.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/nn_wrap.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/progress.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/safetensors.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/tokenizer.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/tokens.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/varbuilder.rs (100%)
rename {diffuse_rs_common => diffusion_rs_common}/src/varbuilder_loading.rs (100%)
rename {diffuse_rs_core => diffusion_rs_core}/Cargo.toml (59%)
rename {diffuse_rs_core => diffusion_rs_core}/src/lib.rs (81%)
rename {diffuse_rs_core => diffusion_rs_core}/src/models/clip/mod.rs (100%)
rename {diffuse_rs_core => diffusion_rs_core}/src/models/clip/text.rs (78%)
rename {diffuse_rs_core => diffusion_rs_core}/src/models/flux/mod.rs (100%)
rename {diffuse_rs_core => diffusion_rs_core}/src/models/flux/model.rs (90%)
rename {diffuse_rs_core => diffusion_rs_core}/src/models/mod.rs (93%)
rename {diffuse_rs_core => diffusion_rs_core}/src/models/t5/mod.rs (97%)
rename {diffuse_rs_core => diffusion_rs_core}/src/models/vaes/autoencoder_kl.rs (93%)
rename {diffuse_rs_core => diffusion_rs_core}/src/models/vaes/mod.rs (94%)
rename {diffuse_rs_core => diffusion_rs_core}/src/models/vaes/vae.rs (88%)
rename {diffuse_rs_core => diffusion_rs_core}/src/pipelines/flux/mod.rs (93%)
rename {diffuse_rs_core => diffusion_rs_core}/src/pipelines/flux/sampling.rs (97%)
rename {diffuse_rs_core => diffusion_rs_core}/src/pipelines/mod.rs (97%)
rename {diffuse_rs_core => diffusion_rs_core}/src/pipelines/sampling.rs (98%)
rename {diffuse_rs_core => diffusion_rs_core}/src/pipelines/scheduler.rs (96%)
rename {diffuse_rs_examples => diffusion_rs_examples}/Cargo.toml (74%)
rename {diffuse_rs_examples => diffusion_rs_examples}/examples/dduf/README.md (100%)
rename {diffuse_rs_examples => diffusion_rs_examples}/examples/dduf/main.rs (92%)
rename {diffuse_rs_examples => diffusion_rs_examples}/examples/flux/README.md (100%)
rename {diffuse_rs_examples => diffusion_rs_examples}/examples/flux/main.rs (94%)
rename {diffuse_rs_py => diffusion_rs_py}/Cargo.toml (61%)
rename {diffuse_rs_py => diffusion_rs_py}/build.rs (100%)
rename {diffuse_rs_py => diffusion_rs_py}/diffuse_rs.pyi (100%)
rename {diffuse_rs_py => diffusion_rs_py}/examples/dduf.py (82%)
rename {diffuse_rs_py => diffusion_rs_py}/examples/flux.py (83%)
create mode 100644 diffusion_rs_py/generate_wheels.sh
rename {diffuse_rs_py => diffusion_rs_py}/pyproject.toml (92%)
rename {diffuse_rs_py => diffusion_rs_py}/src/lib.rs (83%)
diff --git a/.github/workflows/analysis.yaml b/.github/workflows/analysis.yaml
deleted file mode 100644
index 52a5fe0..0000000
--- a/.github/workflows/analysis.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-name: Analysis
-on:
- pull_request_target
-
-jobs:
- comment:
- runs-on: ubuntu-latest
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Install Rust and Cargo
- run: |
- curl -sSf https://sh.rustup.rs | sh -s -- -y
- source $HOME/.cargo/env
-
- - name: Install Tokei
- run: cargo install tokei
-
- - name: Run Tokei and get the lines of code
- run: tokei . > tokei_output.txt
-
- - name: Comment or Update PR
- uses: actions/github-script@v7
- with:
- script: |
- const fs = require('fs');
- const tokeiOutput = fs.readFileSync('tokei_output.txt', 'utf8');
- const uniqueIdentifier = 'Code Metrics Report';
- const codeReport = `
-
- ${uniqueIdentifier}
-
- ${tokeiOutput}
-
-
- `;
-
- const issue_number = context.issue.number;
- const { owner, repo } = context.repo;
-
- const comments = await github.rest.issues.listComments({
- issue_number,
- owner,
- repo
- });
-
- const existingComment = comments.data.find(comment => comment.body.includes(uniqueIdentifier));
-
- if (existingComment) {
- await github.rest.issues.updateComment({
- owner,
- repo,
- comment_id: existingComment.id,
- body: codeReport
- });
- } else {
- await github.rest.issues.createComment({
- issue_number,
- owner,
- repo,
- body: codeReport
- });
- }
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index f8c776d..71b0971 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -38,17 +38,17 @@ jobs:
- name: Build docs
run: |
rm -rf ./docs
- echo "" > target/doc/index.html
+ echo "" > target/doc/index.html
cp -r target/doc ./docs
- name: Build Python docs
run: |
python3 -m venv myenv
source myenv/bin/activate
pip install maturin[patchelf] pdoc
- cd diffuse_rs_py
+ cd diffusion_rs_py
maturin develop
cd ..
- pdoc diffuse_rs -o ./docs/pyo3
+ pdoc diffusion_rs -o ./docs/pyo3
- name: Deploy
uses: JamesIves/github-pages-deploy-action@v4
with:
diff --git a/.typos.toml b/.typos.toml
index d5c733c..c561ee3 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -9,8 +9,8 @@ extend-ignore-identifiers-re = [
[files]
extend-exclude = [
- "diffuse_rs_common/src/core/*",
- "diffuse_rs_common/src/nn/*",
- "diffuse_rs_common/src/cuda_kernels/*",
- "diffuse_rs_common/src/metal_kernels/*"
+ "diffusion_rs_common/src/core/*",
+ "diffusion_rs_common/src/nn/*",
+ "diffusion_rs_common/src/cuda_kernels/*",
+ "diffusion_rs_common/src/metal_kernels/*"
]
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index 9964528..cc3037f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -663,12 +663,12 @@ dependencies = [
]
[[package]]
-name = "diffuse_rs_backend"
-version = "0.1.3"
+name = "diffusion_rs_backend"
+version = "0.1.0"
dependencies = [
"bindgen_cuda",
"byteorder",
- "diffuse_rs_common",
+ "diffusion_rs_common",
"float8",
"half",
"lazy_static",
@@ -682,20 +682,20 @@ dependencies = [
]
[[package]]
-name = "diffuse_rs_cli"
-version = "0.1.3"
+name = "diffusion_rs_cli"
+version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"cliclack",
- "diffuse_rs_core",
+ "diffusion_rs_core",
"tracing",
"tracing-subscriber",
]
[[package]]
-name = "diffuse_rs_common"
-version = "0.1.3"
+name = "diffusion_rs_common"
+version = "0.1.0"
dependencies = [
"accelerate-src",
"anyhow",
@@ -731,13 +731,13 @@ dependencies = [
]
[[package]]
-name = "diffuse_rs_core"
-version = "0.1.3"
+name = "diffusion_rs_core"
+version = "0.1.0"
dependencies = [
"anyhow",
"clap",
- "diffuse_rs_backend",
- "diffuse_rs_common",
+ "diffusion_rs_backend",
+ "diffusion_rs_common",
"float8",
"half",
"hf-hub",
@@ -754,22 +754,22 @@ dependencies = [
]
[[package]]
-name = "diffuse_rs_examples"
-version = "0.1.3"
+name = "diffusion_rs_examples"
+version = "0.1.0"
dependencies = [
"anyhow",
"clap",
- "diffuse_rs_core",
+ "diffusion_rs_core",
"tracing",
"tracing-subscriber",
]
[[package]]
-name = "diffuse_rs_py"
-version = "0.1.3"
+name = "diffusion_rs_py"
+version = "0.1.0"
dependencies = [
"anyhow",
- "diffuse_rs_core",
+ "diffusion_rs_core",
"image",
"pyo3",
"pyo3-build-config",
diff --git a/Cargo.toml b/Cargo.toml
index b583c2d..068c60f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,21 +1,21 @@
[workspace]
members = [
- "diffuse_rs_core",
- "diffuse_rs_examples",
- "diffuse_rs_backend",
- "diffuse_rs_common",
- "diffuse_rs_cli",
- "diffuse_rs_py",
+ "diffusion_rs_core",
+ "diffusion_rs_examples",
+ "diffusion_rs_backend",
+ "diffusion_rs_common",
+ "diffusion_rs_cli",
+ "diffusion_rs_py",
]
resolver = "2"
[workspace.package]
-version = "0.1.3"
+version = "0.1.0"
edition = "2021"
authors = ["Eric Buehler"]
description = "Blazingly fast inference of diffusion models."
-homepage = "https://github.com/EricLBuehler/diffuse-rs"
-repository = "https://github.com/EricLBuehler/diffuse-rs"
+homepage = "https://github.com/EricLBuehler/diffusion-rs"
+repository = "https://github.com/EricLBuehler/diffusion-rs"
keywords = ["machine-learning"]
categories = ["science"]
license = "MIT"
diff --git a/FEATURE_FLAGS.md b/FEATURE_FLAGS.md
index 4dc36db..d507557 100644
--- a/FEATURE_FLAGS.md
+++ b/FEATURE_FLAGS.md
@@ -1,6 +1,6 @@
# Feature flags
-Diffuse-rs controls building with GPU support or CPU SIMD acceleration with feature flags.
+diffusion-rs controls building with GPU support or CPU SIMD acceleration with feature flags.
These are set at compile time and are as follows:
diff --git a/INSTALL.md b/INSTALL.md
index e791d6a..8b161cf 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -1,4 +1,4 @@
-# Installation guide for diffuse-rs
+# Installation guide for diffusion-rs
**ToC**
- [CLI](#cli)
@@ -8,7 +8,7 @@
- [Rust crate](#rust-crate)
## CLI
-1) Installing diffuse-rs via the CLI requires a few prerequisites:
+1) Installing diffusion-rs via the CLI requires a few prerequisites:
- Install the Rust programming language
- Follow the instructions on this site: https://rustup.rs/
- (*Linux/Mac only*) Install OpenSSL (*Ubuntu:* `sudo apt install libssl-dev`, *Brew:* `brew install openssl`)
@@ -18,13 +18,13 @@
- Install the necessary tool: `pip install huggingface_hub`
- Login: `huggingface_cli login`
-3) Install the `diffuse_rs_cli` CLI
+3) Install the `diffusion_rs_cli` CLI
> [!NOTE]
> Replace the `...` below with [feature flags](FEATURE_FLAGS.md) to build for Nvidia GPUs (CUDA) or Apple Silicon GPUs (Metal)
```
-cargo install diffuse_rs_cli --features ...
+cargo install diffusion_rs_cli --features ...
```
4) Try the CLI!
@@ -32,11 +32,11 @@ cargo install diffuse_rs_cli --features ...
> Download the DDUF file here: `wget https://huggingface.co/DDUF/FLUX.1-dev-DDUF/resolve/main/FLUX.1-dev-Q4-bnb.dduf`
```
-diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf
+diffusion_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf
```
## CLI from source
-1) Installing diffuse-rs via the CLI requires a few prerequisites:
+1) Installing diffusion-rs via the CLI requires a few prerequisites:
- Install the Rust programming language
- Follow the instructions on this site: https://rustup.rs/
- (*Linux/Mac only*) Install OpenSSL (*Ubuntu:* `sudo apt install libssl-dev`, *Brew:* `brew install openssl`)
@@ -48,17 +48,17 @@ diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf
3) Clone the repository
```
-git clone https://github.com/EricLBuehler/diffuse-rs.git
-cd diffuse-rs
+git clone https://github.com/EricLBuehler/diffusion-rs.git
+cd diffusion-rs
```
-4) Install the `diffuse_rs_cli` CLI
+4) Install the `diffusion_rs_cli` CLI
> [!NOTE]
> Replace the `...` below with [feature flags](FEATURE_FLAGS.md) to build for Nvidia GPUs (CUDA) or Apple Silicon GPUs (Metal)
```
-cargo install --path diffuse_rs_cli --release --features ...
+cargo install --path diffusion_rs_cli --release --features ...
```
5) Try the CLI!
@@ -66,11 +66,11 @@ cargo install --path diffuse_rs_cli --release --features ...
> Download the DDUF file here: `wget https://huggingface.co/DDUF/FLUX.1-dev-DDUF/resolve/main/FLUX.1-dev-Q4-bnb.dduf`
```
-diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf
+diffusion_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf
```
## Python bindings
-1) Installing diffuse-rs via the Python bindings requires a few prerequisites:
+1) Installing diffusion-rs via the Python bindings requires a few prerequisites:
- Install the Rust programming language
- Follow the instructions on this site: https://rustup.rs/
- (*Linux/Mac only*) Install OpenSSL (*Ubuntu:* `sudo apt install libssl-dev`, *Brew:* `brew install openssl`)
@@ -84,18 +84,18 @@ diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf
|Feature|Flag|
|--|--|
-|Nvidia GPUs (CUDA)|`pip install diffuse_rs_cuda`|
-|Apple Silicon GPUs (Metal)|`pip install diffuse_rs_metal`|
-|Apple Accelerate (CPU)|`pip install diffuse_rs_accelerate`|
-|Intel MKL (CPU)|`pip install diffuse_rs_mkl`|
-|Use AVX or NEON automatically|`pip install diffuse_rs`|
+|Nvidia GPUs (CUDA)|`pip install diffusion_rs_cuda`|
+|Apple Silicon GPUs (Metal)|`pip install diffusion_rs_metal`|
+|Apple Accelerate (CPU)|`pip install diffusion_rs_accelerate`|
+|Intel MKL (CPU)|`pip install diffusion_rs_mkl`|
+|Use AVX or NEON automatically|`pip install diffusion_rs`|
4) Try the Python bindings!
> Download the DDUF file here: `wget https://huggingface.co/DDUF/FLUX.1-dev-DDUF/resolve/main/FLUX.1-dev-Q4-bnb.dduf`
```py
-from diffuse_rs import DiffusionGenerationParams, ModelSource, Pipeline
+from diffusion_rs import DiffusionGenerationParams, ModelSource, Pipeline
from PIL import Image
import io
@@ -113,7 +113,7 @@ image.show()
```
## Python bindings from source
-1) Installing diffuse-rs via the Python bindings requires a few prerequisites:
+1) Installing diffusion-rs via the Python bindings requires a few prerequisites:
- Install the Rust programming language
- Follow the instructions on this site: https://rustup.rs/
- (*Linux/Mac only*) Install OpenSSL (*Ubuntu:* `sudo apt install libssl-dev`, *Brew:* `brew install openssl`)
@@ -125,8 +125,8 @@ image.show()
3) Clone the repository
```
-git clone https://github.com/EricLBuehler/diffuse-rs.git
-cd diffuse-rs
+git clone https://github.com/EricLBuehler/diffusion-rs.git
+cd diffusion-rs
```
4) Install the maturin build tool
@@ -140,7 +140,7 @@ pip install maturin
> Replace the `...` below with [feature flags](FEATURE_FLAGS.md) to build for Nvidia GPUs (CUDA) or Apple Silicon GPUs (Metal)
```
-maturin develop -m diffuse_rs_py/Cargo.toml --release --features ...
+maturin develop -m diffusion_rs_py/Cargo.toml --release --features ...
```
6) Try the Python bindings!
@@ -148,7 +148,7 @@ maturin develop -m diffuse_rs_py/Cargo.toml --release --features ...
> Download the DDUF file here: `wget https://huggingface.co/DDUF/FLUX.1-dev-DDUF/resolve/main/FLUX.1-dev-Q4-bnb.dduf`
```py
-from diffuse_rs import DiffusionGenerationParams, ModelSource, Pipeline
+from diffusion_rs import DiffusionGenerationParams, ModelSource, Pipeline
from PIL import Image
import io
@@ -166,7 +166,7 @@ image.show()
```
## Rust crate
-1) Installing diffuse-rs for usage as a Rust crate requires a few prerequisites:
+1) Installing diffusion-rs for usage as a Rust crate requires a few prerequisites:
- Install the Rust programming language
- Follow the instructions on this site: https://rustup.rs/
- (*Linux/Mac only*) Install OpenSSL (*Ubuntu:* `sudo apt install libssl-dev`, *Brew:* `brew install openssl`)
@@ -177,5 +177,5 @@ image.show()
- Login: `huggingface_cli login`
3) Add the dependency to your `Cargo.toml`
- - Run: `cargo add diffuse_rs_core`
- - Alternatively, you can add the git dependency to your Cargo.toml for the latest updates: `diffuse_rs_core = { git = "https://github.com/EricLBuehler/diffuse-rs.git", version = "0.1.0" }`
+ - Run: `cargo add diffusion_rs_core`
+ - Alternatively, you can add the git dependency to your Cargo.toml for the latest updates: `diffusion_rs_core = { git = "https://github.com/EricLBuehler/diffusion-rs.git", version = "0.1.0" }`
diff --git a/README.md b/README.md
index c4c2f18..835a259 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
- diffuse-rs
+ diffusion-rs
@@ -8,7 +8,7 @@ Blazingly fast inference of diffusion models.
-| Rust Documentation | Python Documentation | Discord |
+| Rust Documentation | Python Documentation | Discord |
@@ -22,7 +22,7 @@ Blazingly fast inference of diffusion models.
- AVX support for x86 CPUs
- Allow acceleration of models larger than the total VRAM size with offloading
-Please do not hesitate to contact us with feature requests via [Github issues](https://github.com/EricLBuehler/diffuse-rs/issues)!
+Please do not hesitate to contact us with feature requests via [Github issues](https://github.com/EricLBuehler/diffusion-rs/issues)!
## Upcoming features
- 🚧 LoRA support
@@ -38,17 +38,17 @@ After [installing](#installation), you can try out these examples!
**CLI:**
```bash
-diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf
+diffusion_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf
```
-More CLI examples [here](diffuse_rs_cli/README.md).
+More CLI examples [here](diffusion_rs_cli/README.md).
**Python:**
-More Python examples [here](diffuse_rs_py/examples).
+More Python examples [here](diffusion_rs_py/examples).
```py
-from diffuse_rs import DiffusionGenerationParams, ModelSource, Pipeline
+from diffusion_rs import DiffusionGenerationParams, ModelSource, Pipeline
from PIL import Image
import io
@@ -67,12 +67,12 @@ image.show()
**Rust crate:**
-Examples with the Rust crate: [here](diffuse_rs_examples/examples).
+Examples with the Rust crate: [here](diffusion_rs_examples/examples).
```rust
use std::time::Instant;
-use diffuse_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource};
+use diffusion_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::EnvFilter;
@@ -115,5 +115,5 @@ images[0].save("image.png")?;
## Contributing
- Anyone is welcome to contribute by opening PRs
- - See [good first issues](https://github.com/EricLBuehler/diffuse-rs/labels/good%20first%20issue) for a starting point!
+ - See [good first issues](https://github.com/EricLBuehler/diffusion-rs/labels/good%20first%20issue) for a starting point!
- Collaborators will be invited based on past contributions
diff --git a/diffuse_rs_backend/README.md b/diffuse_rs_backend/README.md
deleted file mode 100644
index 97083ae..0000000
--- a/diffuse_rs_backend/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `diffuse_rs_backend`
-
-Backend for quantization in diffuse-rs.
\ No newline at end of file
diff --git a/diffuse_rs_cli/README.md b/diffuse_rs_cli/README.md
deleted file mode 100644
index 2f5a1c8..0000000
--- a/diffuse_rs_cli/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# `diffuse_rs_cli`
-
-CLI for diffuse-rs.
-
-## Examples
-- FLUX dev:
-```
-diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf
-```
-
-```
-diffuse_rs_cli --scale 3.5 --num-steps 50 model-id -m black-forest-labs/FLUX.1-dev
-```
-
-- FLUX schnell:
-```
-diffuse_rs_cli --scale 0.0 --num-steps 4 dduf -f FLUX.1-schnell-Q8-bnb.dduf
-```
-
-```
-diffuse_rs_cli --scale 0.0 --num-steps 4 model-id -m black-forest-labs/FLUX.1-dev
-```
\ No newline at end of file
diff --git a/diffuse_rs_common/README.md b/diffuse_rs_common/README.md
deleted file mode 100644
index 2cb940d..0000000
--- a/diffuse_rs_common/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# `diffuse_rs_common`
-
-Common functionality for diffuse-rs, including core ML framework based on Candle, NN functionality, and DDUF loading.
\ No newline at end of file
diff --git a/diffuse_rs_py/generate_wheels.sh b/diffuse_rs_py/generate_wheels.sh
deleted file mode 100644
index 4ee229a..0000000
--- a/diffuse_rs_py/generate_wheels.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-###################################
-### UPLOADING
-###################################
-
-# ⚠️⚠️⚠️⚠️ Be sure to update the `project.name` field in `pyproject.toml`!! ⚠️⚠️⚠️⚠️
-# diffuse_rs, diffuse_rs_cuda, diffuse_rs_metal, diffuse_rs_mkl, diffuse_rs_accelerate
-
-## testpypi:
-# twine upload --repository pypi --password PASSWORD --username __token__ wheels-NAME/*.whl
-
-
-## pypi:
-# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cuda/*.whl
-# twine upload --repository pypi --password PASSWORD --username __token__ wheels-mkl/*.whl
-# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cuda/*.whl
-# twine upload --repository pypi --password PASSWORD --username __token__ wheels-metal/*.whl
-# ⚠️ Need both x86_64 and aarch64 builds before this! ⚠️
-# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cpu/*.whl
-
-
-###################################
-#### MAC: Aarch64 Manylinux and OSX
-###################################
-
-docker build -t wheelmaker:latest -f Dockerfile.manylinux .
-docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.10
-docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.11
-docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.12
-
-maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.10
-maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.11
-maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.12
-
-# Metal
-
-maturin build -o wheels-metal -m diffuse_rs_py/Cargo.toml --interpreter python3.10 --features metal
-maturin build -o wheels-metal -m diffuse_rs_py/Cargo.toml --interpreter python3.11 --features metal
-maturin build -o wheels-metal -m diffuse_rs_py/Cargo.toml --interpreter python3.12 --features metal
-
-# Accelerate
-
-maturin build -o wheels-accelerate -m diffuse_rs_py/Cargo.toml --interpreter python3.10 --features accelerate
-maturin build -o wheels-accelerate -m diffuse_rs_py/Cargo.toml --interpreter python3.11 --features accelerate
-maturin build -o wheels-accelerate -m diffuse_rs_py/Cargo.toml --interpreter python3.12 --features accelerate
-
-####################################
-# WINDOWS: x86_64 Manylinux, Windows
-####################################
-
-maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.10
-maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.11
-maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.12
-
-# CUDA
-
-maturin build -o wheels-cuda -m diffuse_rs_py/Cargo.toml --interpreter python3.10 --features cuda
-maturin build -o wheels-cuda -m diffuse_rs_py/Cargo.toml --interpreter python3.11 --features cuda
-maturin build -o wheels-cuda -m diffuse_rs_py/Cargo.toml --interpreter python3.12 --features cuda
-
-# MKL
-
-maturin build -o wheels-mkl -m diffuse_rs_py/Cargo.toml --interpreter python3.10 --features mkl
-maturin build -o wheels-mkl -m diffuse_rs_py/Cargo.toml --interpreter python3.11 --features mkl
-maturin build -o wheels-mkl -m diffuse_rs_py/Cargo.toml --interpreter python3.12 --features mkl
diff --git a/diffuse_rs_backend/Cargo.toml b/diffusion_rs_backend/Cargo.toml
similarity index 77%
rename from diffuse_rs_backend/Cargo.toml
rename to diffusion_rs_backend/Cargo.toml
index 9a125ad..23f0f1e 100644
--- a/diffuse_rs_backend/Cargo.toml
+++ b/diffusion_rs_backend/Cargo.toml
@@ -1,5 +1,5 @@
[package]
-name = "diffuse_rs_backend"
+name = "diffusion_rs_backend"
readme = "README.md"
authors.workspace = true
version.workspace = true
@@ -23,11 +23,11 @@ thiserror.workspace = true
lazy_static.workspace = true
paste.workspace = true
byteorder.workspace = true
-diffuse_rs_common = { path = "../diffuse_rs_common" }
+diffusion_rs_common = { path = "../diffusion_rs_common" }
[features]
-cuda = ["dep:bindgen_cuda", "diffuse_rs_common/cuda"]
-metal = ["dep:metal", "diffuse_rs_common/metal"]
+cuda = ["dep:bindgen_cuda", "diffusion_rs_common/cuda"]
+metal = ["dep:metal", "diffusion_rs_common/metal"]
[build-dependencies]
bindgen_cuda = { version = "0.1.5", optional = true }
diff --git a/diffusion_rs_backend/README.md b/diffusion_rs_backend/README.md
new file mode 100644
index 0000000..4b8d3dd
--- /dev/null
+++ b/diffusion_rs_backend/README.md
@@ -0,0 +1,3 @@
+# `diffusion_rs_backend`
+
+Backend for quantization in diffusion-rs.
\ No newline at end of file
diff --git a/diffuse_rs_backend/build.rs b/diffusion_rs_backend/build.rs
similarity index 100%
rename from diffuse_rs_backend/build.rs
rename to diffusion_rs_backend/build.rs
diff --git a/diffuse_rs_backend/kernels/bitsandbytes/dequant.cu b/diffusion_rs_backend/kernels/bitsandbytes/dequant.cu
similarity index 100%
rename from diffuse_rs_backend/kernels/bitsandbytes/dequant.cu
rename to diffusion_rs_backend/kernels/bitsandbytes/dequant.cu
diff --git a/diffuse_rs_backend/src/bitsandbytes/ffi.rs b/diffusion_rs_backend/src/bitsandbytes/ffi.rs
similarity index 97%
rename from diffuse_rs_backend/src/bitsandbytes/ffi.rs
rename to diffusion_rs_backend/src/bitsandbytes/ffi.rs
index d7f393d..df6da23 100644
--- a/diffuse_rs_backend/src/bitsandbytes/ffi.rs
+++ b/diffusion_rs_backend/src/bitsandbytes/ffi.rs
@@ -1,4 +1,4 @@
-use diffuse_rs_common::core::cuda::cudarc::driver::sys::CUstream;
+use diffusion_rs_common::core::cuda::cudarc::driver::sys::CUstream;
use half::{bf16, f16};
#[allow(dead_code)]
diff --git a/diffuse_rs_backend/src/bitsandbytes/mod.rs b/diffusion_rs_backend/src/bitsandbytes/mod.rs
similarity index 90%
rename from diffuse_rs_backend/src/bitsandbytes/mod.rs
rename to diffusion_rs_backend/src/bitsandbytes/mod.rs
index e149b1e..02ee1e3 100644
--- a/diffuse_rs_backend/src/bitsandbytes/mod.rs
+++ b/diffusion_rs_backend/src/bitsandbytes/mod.rs
@@ -1,7 +1,7 @@
use std::sync::Arc;
-use diffuse_rs_common::core::{DType, Device, Result, Shape, Tensor};
-use diffuse_rs_common::VarBuilder;
+use diffusion_rs_common::core::{DType, Device, Result, Shape, Tensor};
+use diffusion_rs_common::VarBuilder;
use serde::Deserialize;
use crate::{QuantMethod, QuantMethodConfig};
@@ -117,7 +117,7 @@ impl BnbLinear {
{
Self::linear_4bit(in_dim, out_dim, bias, vb)
} else {
- diffuse_rs_common::bail!("`BnbLinear` expects fp4/nf4 or int8 layers.");
+ diffusion_rs_common::bail!("`BnbLinear` expects fp4/nf4 or int8 layers.");
}
}
@@ -142,7 +142,7 @@ impl BnbLinear {
if !vb_w.contains_tensor("quant_state.bitsandbytes__nf4")
&& !vb_w.contains_tensor("quant_state.bitsandbytes__fp4")
{
- diffuse_rs_common::bail!("`BnbLinear` expects either `...__nf4` or `...__fp4` tensors, this means the layer is not 4bit or 8big.");
+ diffusion_rs_common::bail!("`BnbLinear` expects either `...__nf4` or `...__fp4` tensors, this means the layer is not 4bit or 8big.");
}
let quant_ty = if vb_w.contains_tensor("quant_state.bitsandbytes__nf4") {
@@ -163,29 +163,27 @@ impl BnbLinear {
BnbQuantType::Int8 => None,
};
let Some(state) = state else {
- diffuse_rs_common::bail!("Only fp8/nf4 quantization is supported for now.")
+ diffusion_rs_common::bail!("Only fp8/nf4 quantization is supported for now.")
};
let state_str = String::from_utf8(state.to_vec1::()?)?;
let state: BnbQuantState =
- serde_json::from_str(&state_str).map_err(diffuse_rs_common::core::Error::msg)?;
+ serde_json::from_str(&state_str).map_err(diffusion_rs_common::core::Error::msg)?;
let nested = if vb_w.contains_tensor("nested_absmax") {
// TODO: can `nested_blocksize` be None, default to 64 like bnb?
Some(Arc::new(BnbQuantParmas {
absmax: vb_w.get_unchecked_dtype("nested_absmax", DType::F32)?,
code: vb_w.get_unchecked_dtype("nested_quant_map", DType::F32)?,
- blocksize: state
- .nested_blocksize
- .ok_or(diffuse_rs_common::core::Error::debug(
- "`nested_blocksize` must be present.",
- ))?,
+ blocksize: state.nested_blocksize.ok_or(
+ diffusion_rs_common::core::Error::debug("`nested_blocksize` must be present."),
+ )?,
shape: None,
nested: None,
offset: None, // Put it in the outer one!
dtype: state
.nested_dtype
- .ok_or(diffuse_rs_common::core::Error::debug(
+ .ok_or(diffusion_rs_common::core::Error::debug(
"`nested_dtype` must be present.",
))?,
}))
@@ -233,16 +231,18 @@ impl BnbLinear {
if let Some(nested) = ¶ms.nested {
absmax = Self::dequantize_4bit(¶ms.absmax, nested, BnbQuantType::Int8)?;
absmax = (absmax
- + params.offset.ok_or(diffuse_rs_common::core::Error::debug(
- "`offset` must be present.",
- ))?)?;
+ + params
+ .offset
+ .ok_or(diffusion_rs_common::core::Error::debug(
+ "`offset` must be present.",
+ ))?)?;
}
let out_shape = params.shape.clone().unwrap_or(input.shape().clone());
let out_dtype: DType = params.dtype.into();
if !SUPPORTED_BLOCKSIZE.contains(¶ms.blocksize) {
- diffuse_rs_common::bail!(
+ diffusion_rs_common::bail!(
"Blocksize of {} is not supported, {SUPPORTED_BLOCKSIZE:?} are.",
params.blocksize
);
@@ -262,7 +262,7 @@ impl BnbLinear {
}
impl QuantMethod for BnbLinear {
- fn new(method: QuantMethodConfig) -> diffuse_rs_common::core::Result
+ fn new(method: QuantMethodConfig) -> diffusion_rs_common::core::Result
where
Self: Sized,
{
diff --git a/diffuse_rs_backend/src/bitsandbytes/op.rs b/diffusion_rs_backend/src/bitsandbytes/op.rs
similarity index 82%
rename from diffuse_rs_backend/src/bitsandbytes/op.rs
rename to diffusion_rs_backend/src/bitsandbytes/op.rs
index 2fda5a9..426793f 100644
--- a/diffuse_rs_backend/src/bitsandbytes/op.rs
+++ b/diffusion_rs_backend/src/bitsandbytes/op.rs
@@ -3,12 +3,12 @@
use std::fmt::Debug;
#[cfg(feature = "cuda")]
-use diffuse_rs_common::core::cuda::{
+use diffusion_rs_common::core::cuda::{
cudarc::driver::{sys::CUstream, CudaSlice, CudaView, DeviceRepr, ValidAsZeroBits},
CudaDevice,
};
-use diffuse_rs_common::core::{
+use diffusion_rs_common::core::{
backend::BackendStorage, CpuStorage, CustomOp2, CustomOp3, DType, Result, Shape, Tensor,
WithDType,
};
@@ -209,7 +209,7 @@ impl DequantizeOp {
dev: &CudaDevice,
kernel: unsafe extern "C" fn(*const f32, *const u8, *const f32, *mut T, i32, i32, CUstream),
) -> Result> {
- use diffuse_rs_common::core::cuda::{cudarc::driver::DevicePtr, WrapErr};
+ use diffusion_rs_common::core::cuda::{cudarc::driver::DevicePtr, WrapErr};
let out = unsafe { dev.alloc::(self.shape.elem_count()).w()? };
unsafe {
@@ -236,14 +236,14 @@ impl CustomOp3 for DequantizeOp {
fn cpu_fwd(
&self,
input_s: &CpuStorage,
- input_l: &diffuse_rs_common::core::Layout,
+ input_l: &diffusion_rs_common::core::Layout,
absmax_s: &CpuStorage,
- absmax_l: &diffuse_rs_common::core::Layout,
+ absmax_l: &diffusion_rs_common::core::Layout,
code_s: &CpuStorage,
- code_l: &diffuse_rs_common::core::Layout,
- ) -> diffuse_rs_common::core::Result<(CpuStorage, diffuse_rs_common::core::Shape)> {
+ code_l: &diffusion_rs_common::core::Layout,
+ ) -> diffusion_rs_common::core::Result<(CpuStorage, diffusion_rs_common::core::Shape)> {
if !(input_l.is_contiguous() && absmax_l.is_contiguous() && code_l.is_contiguous()) {
- diffuse_rs_common::bail!("All inputs must be contiguous");
+ diffusion_rs_common::bail!("All inputs must be contiguous");
}
match (input_s, absmax_s, code_s, self.out_ty) {
(
@@ -273,7 +273,7 @@ impl CustomOp3 for DequantizeOp {
CpuStorage::F32(self.dequantize_cpu(input, absmax, code, self.quant_ty)),
self.shape.clone(),
)),
- (i, a, c, t) => diffuse_rs_common::bail!(
+ (i, a, c, t) => diffusion_rs_common::bail!(
"Unsupported dtypes for cpu dequant: {:?} input, {:?} absmax, {:?} code, {:?} out",
i.dtype(),
a.dtype(),
@@ -286,15 +286,15 @@ impl CustomOp3 for DequantizeOp {
#[cfg(feature = "cuda")]
fn cuda_fwd(
&self,
- input_s: &diffuse_rs_common::core::CudaStorage,
- input_l: &diffuse_rs_common::core::Layout,
- absmax_s: &diffuse_rs_common::core::CudaStorage,
- absmax_l: &diffuse_rs_common::core::Layout,
- code_s: &diffuse_rs_common::core::CudaStorage,
- code_l: &diffuse_rs_common::core::Layout,
- ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> {
+ input_s: &diffusion_rs_common::core::CudaStorage,
+ input_l: &diffusion_rs_common::core::Layout,
+ absmax_s: &diffusion_rs_common::core::CudaStorage,
+ absmax_l: &diffusion_rs_common::core::Layout,
+ code_s: &diffusion_rs_common::core::CudaStorage,
+ code_l: &diffusion_rs_common::core::Layout,
+ ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> {
if !(input_l.is_contiguous() && absmax_l.is_contiguous() && code_l.is_contiguous()) {
- diffuse_rs_common::bail!("All inputs must be contiguous");
+ diffusion_rs_common::bail!("All inputs must be contiguous");
}
let input_slice = input_s
.as_cuda_slice::()?
@@ -308,7 +308,7 @@ impl CustomOp3 for DequantizeOp {
let dev = input_s.device().clone();
let out = match (self.out_ty, self.quant_ty) {
(BnbDType::F32, BnbQuantType::Nf4) => {
- diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel::(
input_slice,
code_slice,
@@ -320,7 +320,7 @@ impl CustomOp3 for DequantizeOp {
)
}
(BnbDType::F16, BnbQuantType::Nf4) => {
- diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel::(
input_slice,
code_slice,
@@ -332,7 +332,7 @@ impl CustomOp3 for DequantizeOp {
)
}
(BnbDType::BF16, BnbQuantType::Nf4) => {
- diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel::(
input_slice,
code_slice,
@@ -345,7 +345,7 @@ impl CustomOp3 for DequantizeOp {
}
(BnbDType::F32, BnbQuantType::Fp4) => {
- diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel::(
input_slice,
code_slice,
@@ -357,7 +357,7 @@ impl CustomOp3 for DequantizeOp {
)
}
(BnbDType::F16, BnbQuantType::Fp4) => {
- diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel::(
input_slice,
code_slice,
@@ -369,7 +369,7 @@ impl CustomOp3 for DequantizeOp {
)
}
(BnbDType::BF16, BnbQuantType::Fp4) => {
- diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel::(
input_slice,
code_slice,
@@ -382,7 +382,7 @@ impl CustomOp3 for DequantizeOp {
}
(BnbDType::F32, BnbQuantType::Int8) => {
- diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel::(
input_slice,
code_slice,
@@ -394,7 +394,7 @@ impl CustomOp3 for DequantizeOp {
)
}
(BnbDType::F16, BnbQuantType::Int8) => {
- diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel::(
input_slice,
code_slice,
@@ -406,7 +406,7 @@ impl CustomOp3 for DequantizeOp {
)
}
(BnbDType::BF16, BnbQuantType::Int8) => {
- diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel::(
input_slice,
code_slice,
@@ -425,17 +425,17 @@ impl CustomOp3 for DequantizeOp {
#[cfg(feature = "metal")]
fn metal_fwd(
&self,
- input_s: &diffuse_rs_common::core::MetalStorage,
- input_l: &diffuse_rs_common::core::Layout,
- absmax_s: &diffuse_rs_common::core::MetalStorage,
- absmax_l: &diffuse_rs_common::core::Layout,
- code_s: &diffuse_rs_common::core::MetalStorage,
- code_l: &diffuse_rs_common::core::Layout,
- ) -> Result<(diffuse_rs_common::core::MetalStorage, Shape)> {
- use diffuse_rs_common::core::DType;
+ input_s: &diffusion_rs_common::core::MetalStorage,
+ input_l: &diffusion_rs_common::core::Layout,
+ absmax_s: &diffusion_rs_common::core::MetalStorage,
+ absmax_l: &diffusion_rs_common::core::Layout,
+ code_s: &diffusion_rs_common::core::MetalStorage,
+ code_l: &diffusion_rs_common::core::Layout,
+ ) -> Result<(diffusion_rs_common::core::MetalStorage, Shape)> {
+ use diffusion_rs_common::core::DType;
if !(input_l.is_contiguous() && absmax_l.is_contiguous() && code_l.is_contiguous()) {
- diffuse_rs_common::bail!("All inputs must be contiguous");
+ diffusion_rs_common::bail!("All inputs must be contiguous");
}
let command_buffer = input_s.device().command_buffer()?;
@@ -450,13 +450,13 @@ impl CustomOp3 for DequantizeOp {
)?;
if input_s.dtype() != DType::U8 {
- diffuse_rs_common::bail!("input must be u8");
+ diffusion_rs_common::bail!("input must be u8");
}
if code_s.dtype() != DType::F32 {
- diffuse_rs_common::bail!("code must be f32");
+ diffusion_rs_common::bail!("code must be f32");
}
if absmax_s.dtype() != DType::F32 {
- diffuse_rs_common::bail!("absmax must be f32");
+ diffusion_rs_common::bail!("absmax must be f32");
}
match self.quant_ty {
@@ -475,7 +475,7 @@ impl CustomOp3 for DequantizeOp {
self.blocksize,
self.n,
)
- .map_err(diffuse_rs_common::core::Error::wrap)?,
+ .map_err(diffusion_rs_common::core::Error::wrap)?,
BnbQuantType::Fp4 => crate::metal_kernels::call_dequant_bnb_fp4(
device.device(),
&command_buffer,
@@ -491,7 +491,7 @@ impl CustomOp3 for DequantizeOp {
self.blocksize,
self.n,
)
- .map_err(diffuse_rs_common::core::Error::wrap)?,
+ .map_err(diffusion_rs_common::core::Error::wrap)?,
BnbQuantType::Int8 => crate::metal_kernels::call_dequant_bnb_int8(
device.device(),
&command_buffer,
@@ -507,10 +507,10 @@ impl CustomOp3 for DequantizeOp {
self.blocksize,
self.n,
)
- .map_err(diffuse_rs_common::core::Error::wrap)?,
+ .map_err(diffusion_rs_common::core::Error::wrap)?,
};
- let newstorage = diffuse_rs_common::core::MetalStorage::new(
+ let newstorage = diffusion_rs_common::core::MetalStorage::new(
output,
device.clone(),
self.shape.elem_count(),
@@ -574,7 +574,7 @@ impl Dequantize8BitOp {
dev: &CudaDevice,
kernel: unsafe extern "C" fn(*const i8, *const f32, *mut T, i32, i32, i32),
) -> Result> {
- use diffuse_rs_common::core::cuda::{cudarc::driver::DevicePtr, WrapErr};
+ use diffusion_rs_common::core::cuda::{cudarc::driver::DevicePtr, WrapErr};
let out = unsafe { dev.alloc::(n as usize).w()? };
unsafe {
@@ -600,19 +600,19 @@ impl CustomOp2 for Dequantize8BitOp {
fn cpu_fwd(
&self,
weight_s: &CpuStorage,
- weight_l: &diffuse_rs_common::core::Layout,
+ weight_l: &diffusion_rs_common::core::Layout,
scb_s: &CpuStorage,
- scb_l: &diffuse_rs_common::core::Layout,
- ) -> diffuse_rs_common::core::Result<(CpuStorage, diffuse_rs_common::core::Shape)> {
+ scb_l: &diffusion_rs_common::core::Layout,
+ ) -> diffusion_rs_common::core::Result<(CpuStorage, diffusion_rs_common::core::Shape)> {
if !(weight_l.is_contiguous() && scb_l.is_contiguous()) {
- diffuse_rs_common::bail!("All inputs must be contiguous");
+ diffusion_rs_common::bail!("All inputs must be contiguous");
}
let row = weight_l.dim(0)?;
let col = weight_l.dim(1)?;
if row != scb_l.dim(0)? {
- diffuse_rs_common::bail!("scb dim0 must match weight dim0");
+ diffusion_rs_common::bail!("scb dim0 must match weight dim0");
}
match (weight_s, scb_s, self.out_ty) {
@@ -628,7 +628,7 @@ impl CustomOp2 for Dequantize8BitOp {
CpuStorage::F32(self.dequantize_cpu(weight, scb, col)),
weight_l.shape().clone(),
)),
- (w, s, t) => diffuse_rs_common::bail!(
+ (w, s, t) => diffusion_rs_common::bail!(
"Unsupported dtypes for cpu dequant: {:?} weight, {:?} scb, {:?} out",
w.dtype(),
s.dtype(),
@@ -640,13 +640,13 @@ impl CustomOp2 for Dequantize8BitOp {
#[cfg(feature = "cuda")]
fn cuda_fwd(
&self,
- weight_s: &diffuse_rs_common::core::CudaStorage,
- weight_l: &diffuse_rs_common::core::Layout,
- scb_s: &diffuse_rs_common::core::CudaStorage,
- scb_l: &diffuse_rs_common::core::Layout,
- ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> {
+ weight_s: &diffusion_rs_common::core::CudaStorage,
+ weight_l: &diffusion_rs_common::core::Layout,
+ scb_s: &diffusion_rs_common::core::CudaStorage,
+ scb_l: &diffusion_rs_common::core::Layout,
+ ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> {
if !(weight_l.is_contiguous() && scb_l.is_contiguous()) {
- diffuse_rs_common::bail!("All inputs must be contiguous");
+ diffusion_rs_common::bail!("All inputs must be contiguous");
}
let weight_slice = weight_s
.as_cuda_slice::()?
@@ -659,11 +659,11 @@ impl CustomOp2 for Dequantize8BitOp {
let n = weight_l.shape().elem_count() as i32;
if row != scb_l.dim(0)? as i32 {
- diffuse_rs_common::bail!("scb dim0 must match weight dim0");
+ diffusion_rs_common::bail!("scb dim0 must match weight dim0");
}
let out = match self.out_ty {
- DType::F32 => diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ DType::F32 => diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel(
weight_slice,
scb_slice,
@@ -675,7 +675,7 @@ impl CustomOp2 for Dequantize8BitOp {
)?,
dev,
),
- DType::F16 => diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ DType::F16 => diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel(
weight_slice,
scb_slice,
@@ -687,7 +687,7 @@ impl CustomOp2 for Dequantize8BitOp {
)?,
dev,
),
- DType::BF16 => diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(
+ DType::BF16 => diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(
self.dispatch_cuda_kernel(
weight_slice,
scb_slice,
@@ -699,7 +699,7 @@ impl CustomOp2 for Dequantize8BitOp {
)?,
dev,
),
- _ => diffuse_rs_common::bail!("only f32/bf16/f16 are allowed in dequantize-8bit-op"),
+ _ => diffusion_rs_common::bail!("only f32/bf16/f16 are allowed in dequantize-8bit-op"),
};
Ok((out, weight_l.shape().clone()))
@@ -708,15 +708,15 @@ impl CustomOp2 for Dequantize8BitOp {
#[cfg(feature = "metal")]
fn metal_fwd(
&self,
- weight_s: &diffuse_rs_common::core::MetalStorage,
- weight_l: &diffuse_rs_common::core::Layout,
- scb_s: &diffuse_rs_common::core::MetalStorage,
- scb_l: &diffuse_rs_common::core::Layout,
- ) -> Result<(diffuse_rs_common::core::MetalStorage, Shape)> {
- use diffuse_rs_common::core::DType;
+ weight_s: &diffusion_rs_common::core::MetalStorage,
+ weight_l: &diffusion_rs_common::core::Layout,
+ scb_s: &diffusion_rs_common::core::MetalStorage,
+ scb_l: &diffusion_rs_common::core::Layout,
+ ) -> Result<(diffusion_rs_common::core::MetalStorage, Shape)> {
+ use diffusion_rs_common::core::DType;
if !(weight_l.is_contiguous() && scb_l.is_contiguous()) {
- diffuse_rs_common::bail!("All inputs must be contiguous");
+ diffusion_rs_common::bail!("All inputs must be contiguous");
}
let command_buffer = weight_s.device().command_buffer()?;
@@ -731,14 +731,14 @@ impl CustomOp2 for Dequantize8BitOp {
let output = device.new_buffer(n, self.out_ty, "dequant-8bit-bnb")?;
if weight_s.dtype() != DType::I8 {
- diffuse_rs_common::bail!("input must be i8");
+ diffusion_rs_common::bail!("input must be i8");
}
if scb_s.dtype() != DType::F32 {
- diffuse_rs_common::bail!("scb must be f32");
+ diffusion_rs_common::bail!("scb must be f32");
}
if row != scb_l.dim(0)? {
- diffuse_rs_common::bail!("scb dim0 must match weight dim0");
+ diffusion_rs_common::bail!("scb dim0 must match weight dim0");
}
crate::metal_kernels::call_dequant_bnb_8bit(
@@ -755,10 +755,10 @@ impl CustomOp2 for Dequantize8BitOp {
col,
n,
)
- .map_err(diffuse_rs_common::core::Error::wrap)?;
+ .map_err(diffusion_rs_common::core::Error::wrap)?;
let newstorage =
- diffuse_rs_common::core::MetalStorage::new(output, device.clone(), n, self.out_ty);
+ diffusion_rs_common::core::MetalStorage::new(output, device.clone(), n, self.out_ty);
Ok((newstorage, weight_l.shape().clone()))
}
}
diff --git a/diffuse_rs_backend/src/cublaslt/api.rs b/diffusion_rs_backend/src/cublaslt/api.rs
similarity index 70%
rename from diffuse_rs_backend/src/cublaslt/api.rs
rename to diffusion_rs_backend/src/cublaslt/api.rs
index e8ef14c..4146ab3 100644
--- a/diffuse_rs_backend/src/cublaslt/api.rs
+++ b/diffusion_rs_backend/src/cublaslt/api.rs
@@ -1,10 +1,12 @@
-use diffuse_rs_common::core::cuda::cudarc::driver::DevicePtr;
+use diffusion_rs_common::core::cuda::cudarc::driver::DevicePtr;
use float8::F8E4M3;
use std::ffi::c_int;
-use diffuse_rs_common::core::backend::BackendStorage;
-use diffuse_rs_common::core::cuda_backend::WrapErr;
-use diffuse_rs_common::core::{CpuStorage, DType, Device, Layout, Result, Shape, Storage, Tensor};
+use diffusion_rs_common::core::backend::BackendStorage;
+use diffusion_rs_common::core::cuda_backend::WrapErr;
+use diffusion_rs_common::core::{
+ CpuStorage, DType, Device, Layout, Result, Shape, Storage, Tensor,
+};
use half::{bf16, f16};
use std::sync::Arc;
@@ -18,7 +20,7 @@ impl CublasLt {
pub fn new(device: &Device) -> Result {
let dev = match device {
Device::Cuda(d) => d,
- _ => diffuse_rs_common::bail!("`device` must be a `cuda` device"),
+ _ => diffusion_rs_common::bail!("`device` must be a `cuda` device"),
};
let inner = CudaBlasLT::new(dev.cuda_device()).unwrap();
@@ -38,13 +40,13 @@ pub struct CublasLTBatchMatmul {
impl CublasLTBatchMatmul {
pub fn fwd_f16(
&self,
- a: &diffuse_rs_common::core::CudaStorage,
+ a: &diffusion_rs_common::core::CudaStorage,
a_l: &Layout,
- b: &diffuse_rs_common::core::CudaStorage,
+ b: &diffusion_rs_common::core::CudaStorage,
b_l: &Layout,
- bias: Option<&diffuse_rs_common::core::CudaStorage>,
+ bias: Option<&diffusion_rs_common::core::CudaStorage>,
bias_l: Option<&Layout>,
- ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> {
+ ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> {
let dev = a.device();
// Assume TN
@@ -52,11 +54,11 @@ impl CublasLTBatchMatmul {
let (b_0, n, b_2) = b_l.shape().dims3()?;
if b_2 != k {
- diffuse_rs_common::bail!("This layer only supports TN layout");
+ diffusion_rs_common::bail!("This layer only supports TN layout");
}
if b_0 != batch_size {
- diffuse_rs_common::bail!("`b` must have the same batch size as `a`")
+ diffusion_rs_common::bail!("`b` must have the same batch size as `a`")
}
let lda = k;
@@ -70,7 +72,7 @@ impl CublasLTBatchMatmul {
let bias = if let (Some(bias), Some(bias_l)) = (bias, bias_l) {
if bias_l.shape().dims1()? != m {
- diffuse_rs_common::bail!("Bias does not have the correct shape");
+ diffusion_rs_common::bail!("Bias does not have the correct shape");
}
Some(bias.as_cuda_slice::()?.slice(bias_l.start_offset()..))
@@ -82,25 +84,25 @@ impl CublasLTBatchMatmul {
let (c, c_l) = c.storage_and_layout();
let c = match &*c {
Storage::Cuda(storage) => storage.as_cuda_slice::()?,
- _ => diffuse_rs_common::bail!("`c` must be a cuda tensor"),
+ _ => diffusion_rs_common::bail!("`c` must be a cuda tensor"),
};
match c_l.contiguous_offsets() {
Some((o1, o2)) => {
if o1 != 0 {
- diffuse_rs_common::bail!("`c` start offset must be 0");
+ diffusion_rs_common::bail!("`c` start offset must be 0");
}
if o2 != out_shape.elem_count() {
- diffuse_rs_common::bail!(
+ diffusion_rs_common::bail!(
"`c` end offset must be {}",
out_shape.elem_count()
)
}
}
- None => diffuse_rs_common::bail!("`c` has to be contiguous"),
+ None => diffusion_rs_common::bail!("`c` has to be contiguous"),
};
if c_l.shape().dims3()? != (batch_size, n, m) {
- diffuse_rs_common::bail!("`c` does not have the correct shape");
+ diffusion_rs_common::bail!("`c` does not have the correct shape");
}
// Set beta to 0.0 if it is not set
@@ -134,23 +136,23 @@ impl CublasLTBatchMatmul {
unsafe {
self.cublaslt
.matmul(config, &a, &b, &mut out, bias.as_ref(), self.act.as_ref())
- .map_err(|e| diffuse_rs_common::core::Error::Cuda(Box::new(e)))?;
+ .map_err(|e| diffusion_rs_common::core::Error::Cuda(Box::new(e)))?;
}
- let out = diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone());
+ let out = diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone());
Ok((out, out_shape))
}
pub fn fwd_bf16(
&self,
- a: &diffuse_rs_common::core::CudaStorage,
+ a: &diffusion_rs_common::core::CudaStorage,
a_l: &Layout,
- b: &diffuse_rs_common::core::CudaStorage,
+ b: &diffusion_rs_common::core::CudaStorage,
b_l: &Layout,
- bias: Option<&diffuse_rs_common::core::CudaStorage>,
+ bias: Option<&diffusion_rs_common::core::CudaStorage>,
bias_l: Option<&Layout>,
- ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> {
+ ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> {
let dev = a.device();
// Assume TN
@@ -158,11 +160,11 @@ impl CublasLTBatchMatmul {
let (b_0, n, b_2) = b_l.shape().dims3()?;
if b_2 != k {
- diffuse_rs_common::bail!("This layer only supports TN layout");
+ diffusion_rs_common::bail!("This layer only supports TN layout");
}
if b_0 != batch_size {
- diffuse_rs_common::bail!("`b` must have the same batch size as `a`")
+ diffusion_rs_common::bail!("`b` must have the same batch size as `a`")
}
let lda = k;
@@ -176,7 +178,7 @@ impl CublasLTBatchMatmul {
let bias = if let (Some(bias), Some(bias_l)) = (bias, bias_l) {
if bias_l.shape().dims1()? != m {
- diffuse_rs_common::bail!("Bias does not have the correct shape");
+ diffusion_rs_common::bail!("Bias does not have the correct shape");
}
Some(bias.as_cuda_slice::()?.slice(bias_l.start_offset()..))
@@ -188,25 +190,25 @@ impl CublasLTBatchMatmul {
let (c, c_l) = c.storage_and_layout();
let c = match &*c {
Storage::Cuda(storage) => storage.as_cuda_slice::()?,
- _ => diffuse_rs_common::bail!("`c` must be a cuda tensor"),
+ _ => diffusion_rs_common::bail!("`c` must be a cuda tensor"),
};
match c_l.contiguous_offsets() {
Some((o1, o2)) => {
if o1 != 0 {
- diffuse_rs_common::bail!("`c` start offset must be 0");
+ diffusion_rs_common::bail!("`c` start offset must be 0");
}
if o2 != out_shape.elem_count() {
- diffuse_rs_common::bail!(
+ diffusion_rs_common::bail!(
"`c` end offset must be {}",
out_shape.elem_count()
)
}
}
- None => diffuse_rs_common::bail!("`c` has to be contiguous"),
+ None => diffusion_rs_common::bail!("`c` has to be contiguous"),
};
if c_l.shape().dims3()? != (batch_size, n, m) {
- diffuse_rs_common::bail!("`c` does not have the correct shape");
+ diffusion_rs_common::bail!("`c` does not have the correct shape");
}
// Set beta to 0.0 if it is not set
@@ -240,23 +242,23 @@ impl CublasLTBatchMatmul {
unsafe {
self.cublaslt
.matmul(config, &a, &b, &mut out, bias.as_ref(), self.act.as_ref())
- .map_err(|e| diffuse_rs_common::core::Error::Cuda(Box::new(e)))?;
+ .map_err(|e| diffusion_rs_common::core::Error::Cuda(Box::new(e)))?;
}
- let out = diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone());
+ let out = diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone());
Ok((out, out_shape))
}
pub fn fwd_f32(
&self,
- a: &diffuse_rs_common::core::CudaStorage,
+ a: &diffusion_rs_common::core::CudaStorage,
a_l: &Layout,
- b: &diffuse_rs_common::core::CudaStorage,
+ b: &diffusion_rs_common::core::CudaStorage,
b_l: &Layout,
- bias: Option<&diffuse_rs_common::core::CudaStorage>,
+ bias: Option<&diffusion_rs_common::core::CudaStorage>,
bias_l: Option<&Layout>,
- ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> {
+ ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> {
let dev = a.device();
// Assume TN
@@ -264,11 +266,11 @@ impl CublasLTBatchMatmul {
let (b_0, n, b_2) = b_l.shape().dims3()?;
if b_2 != k {
- diffuse_rs_common::bail!("This layer only supports TN layout");
+ diffusion_rs_common::bail!("This layer only supports TN layout");
}
if b_0 != batch_size {
- diffuse_rs_common::bail!("`b` must have the same batch size as `a`")
+ diffusion_rs_common::bail!("`b` must have the same batch size as `a`")
}
let lda = k;
@@ -282,7 +284,7 @@ impl CublasLTBatchMatmul {
let bias = if let (Some(bias), Some(bias_l)) = (bias, bias_l) {
if bias_l.shape().dims1()? != m {
- diffuse_rs_common::bail!("Bias does not have the correct shape");
+ diffusion_rs_common::bail!("Bias does not have the correct shape");
}
Some(bias.as_cuda_slice::()?.slice(bias_l.start_offset()..))
@@ -294,25 +296,25 @@ impl CublasLTBatchMatmul {
let (c, c_l) = c.storage_and_layout();
let c = match &*c {
Storage::Cuda(storage) => storage.as_cuda_slice::()?,
- _ => diffuse_rs_common::bail!("`c` must be a cuda tensor"),
+ _ => diffusion_rs_common::bail!("`c` must be a cuda tensor"),
};
match c_l.contiguous_offsets() {
Some((o1, o2)) => {
if o1 != 0 {
- diffuse_rs_common::bail!("`c` start offset must be 0");
+ diffusion_rs_common::bail!("`c` start offset must be 0");
}
if o2 != out_shape.elem_count() {
- diffuse_rs_common::bail!(
+ diffusion_rs_common::bail!(
"`c` end offset must be {}",
out_shape.elem_count()
)
}
}
- None => diffuse_rs_common::bail!("`c` has to be contiguous"),
+ None => diffusion_rs_common::bail!("`c` has to be contiguous"),
};
if c_l.shape().dims3()? != (batch_size, n, m) {
- diffuse_rs_common::bail!("`c` does not have the correct shape");
+ diffusion_rs_common::bail!("`c` does not have the correct shape");
}
// Set beta to 0.0 if it is not set
@@ -346,16 +348,16 @@ impl CublasLTBatchMatmul {
unsafe {
self.cublaslt
.matmul(config, &a, &b, &mut out, bias.as_ref(), self.act.as_ref())
- .map_err(|e| diffuse_rs_common::core::Error::Cuda(Box::new(e)))?;
+ .map_err(|e| diffusion_rs_common::core::Error::Cuda(Box::new(e)))?;
}
- let out = diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone());
+ let out = diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone());
Ok((out, out_shape))
}
}
-impl diffuse_rs_common::core::CustomOp2 for CublasLTBatchMatmul {
+impl diffusion_rs_common::core::CustomOp2 for CublasLTBatchMatmul {
fn name(&self) -> &'static str {
"cublaslt-batch-matmul"
}
@@ -367,22 +369,22 @@ impl diffuse_rs_common::core::CustomOp2 for CublasLTBatchMatmul {
_: &CpuStorage,
_: &Layout,
) -> Result<(CpuStorage, Shape)> {
- diffuse_rs_common::bail!("no cpu support for cublaslt-batch-matmul")
+ diffusion_rs_common::bail!("no cpu support for cublaslt-batch-matmul")
}
fn cuda_fwd(
&self,
- a: &diffuse_rs_common::core::CudaStorage,
+ a: &diffusion_rs_common::core::CudaStorage,
a_l: &Layout,
- b: &diffuse_rs_common::core::CudaStorage,
+ b: &diffusion_rs_common::core::CudaStorage,
b_l: &Layout,
- ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> {
+ ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> {
match a.dtype() {
- diffuse_rs_common::core::DType::F16 => self.fwd_f16(a, a_l, b, b_l, None, None),
- diffuse_rs_common::core::DType::BF16 => self.fwd_bf16(a, a_l, b, b_l, None, None),
- diffuse_rs_common::core::DType::F32 => self.fwd_f32(a, a_l, b, b_l, None, None),
+ diffusion_rs_common::core::DType::F16 => self.fwd_f16(a, a_l, b, b_l, None, None),
+ diffusion_rs_common::core::DType::BF16 => self.fwd_bf16(a, a_l, b, b_l, None, None),
+ diffusion_rs_common::core::DType::F32 => self.fwd_f32(a, a_l, b, b_l, None, None),
dt => {
- diffuse_rs_common::bail!(
+ diffusion_rs_common::bail!(
"cublaslt-batch-matmul is only supported for f16/bf16/f32 ({dt:?})"
)
}
@@ -390,7 +392,7 @@ impl diffuse_rs_common::core::CustomOp2 for CublasLTBatchMatmul {
}
}
-impl diffuse_rs_common::core::CustomOp3 for CublasLTBatchMatmul {
+impl diffusion_rs_common::core::CustomOp3 for CublasLTBatchMatmul {
fn name(&self) -> &'static str {
"cublaslt-batch-matmul-add"
}
@@ -404,29 +406,29 @@ impl diffuse_rs_common::core::CustomOp3 for CublasLTBatchMatmul {
_: &CpuStorage,
_: &Layout,
) -> Result<(CpuStorage, Shape)> {
- diffuse_rs_common::bail!("no cpu support for cublaslt-batch-matmul-add")
+ diffusion_rs_common::bail!("no cpu support for cublaslt-batch-matmul-add")
}
fn cuda_fwd(
&self,
- a: &diffuse_rs_common::core::CudaStorage,
+ a: &diffusion_rs_common::core::CudaStorage,
a_l: &Layout,
- b: &diffuse_rs_common::core::CudaStorage,
+ b: &diffusion_rs_common::core::CudaStorage,
b_l: &Layout,
- bias: &diffuse_rs_common::core::CudaStorage,
+ bias: &diffusion_rs_common::core::CudaStorage,
bias_l: &Layout,
- ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> {
+ ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> {
match a.dtype() {
- diffuse_rs_common::core::DType::F16 => {
+ diffusion_rs_common::core::DType::F16 => {
self.fwd_f16(a, a_l, b, b_l, Some(bias), Some(bias_l))
}
- diffuse_rs_common::core::DType::BF16 => {
+ diffusion_rs_common::core::DType::BF16 => {
self.fwd_bf16(a, a_l, b, b_l, Some(bias), Some(bias_l))
}
- diffuse_rs_common::core::DType::F32 => {
+ diffusion_rs_common::core::DType::F32 => {
self.fwd_f32(a, a_l, b, b_l, Some(bias), Some(bias_l))
}
- dt => diffuse_rs_common::bail!(
+ dt => diffusion_rs_common::bail!(
"cublaslt-batch-matmul-add is only supported for f16/bf16/f32 ({dt:?})"
),
}
diff --git a/diffuse_rs_backend/src/cublaslt/matmul.rs b/diffusion_rs_backend/src/cublaslt/matmul.rs
similarity index 98%
rename from diffuse_rs_backend/src/cublaslt/matmul.rs
rename to diffusion_rs_backend/src/cublaslt/matmul.rs
index afd24c3..b060156 100644
--- a/diffuse_rs_backend/src/cublaslt/matmul.rs
+++ b/diffusion_rs_backend/src/cublaslt/matmul.rs
@@ -1,11 +1,11 @@
use core::ffi::c_int;
use core::mem;
-use diffuse_rs_common::core::cuda::cudarc::cublaslt::result::set_matrix_layout_attribute;
-use diffuse_rs_common::core::cuda::cudarc::cublaslt::{result, result::CublasError, sys};
-use diffuse_rs_common::core::cuda::cudarc::driver::sys::{
+use diffusion_rs_common::core::cuda::cudarc::cublaslt::result::set_matrix_layout_attribute;
+use diffusion_rs_common::core::cuda::cudarc::cublaslt::{result, result::CublasError, sys};
+use diffusion_rs_common::core::cuda::cudarc::driver::sys::{
CUdevice_attribute, CUdeviceptr, CUstream,
};
-use diffuse_rs_common::core::cuda::cudarc::driver::{
+use diffusion_rs_common::core::cuda::cudarc::driver::{
CudaDevice, CudaSlice, DevicePtr, DevicePtrMut, DriverError,
};
use float8::F8E4M3;
diff --git a/diffuse_rs_backend/src/cublaslt/mod.rs b/diffusion_rs_backend/src/cublaslt/mod.rs
similarity index 91%
rename from diffuse_rs_backend/src/cublaslt/mod.rs
rename to diffusion_rs_backend/src/cublaslt/mod.rs
index 3faaf29..7816d6a 100644
--- a/diffuse_rs_backend/src/cublaslt/mod.rs
+++ b/diffusion_rs_backend/src/cublaslt/mod.rs
@@ -2,8 +2,8 @@
#![allow(unused_variables, unused_imports, dead_code)]
-use diffuse_rs_common::core::{Device, Result, Tensor};
-use diffuse_rs_common::nn::Activation as CandleActivation;
+use diffusion_rs_common::core::{Device, Result, Tensor};
+use diffusion_rs_common::nn::Activation as CandleActivation;
use once_cell::sync::Lazy;
use std::sync::{Mutex, Once};
@@ -38,7 +38,7 @@ pub fn maybe_init_cublas_lt_wrapper() {
// Check if we can call the driver
// Then check if we can create a device
// Then check that the device is CUDA
- use diffuse_rs_common::core::cuda_backend::cudarc::driver;
+ use diffusion_rs_common::core::cuda_backend::cudarc::driver;
CUBLASLT = driver::result::init()
.ok()
.and_then(|_| Device::cuda_if_available(0).ok())
@@ -107,13 +107,13 @@ impl CublasLtWrapper {
)?;
if Some(CandleActivation::Swiglu) == act {
- result = diffuse_rs_common::nn::ops::swiglu(&result)?;
+ result = diffusion_rs_common::nn::ops::swiglu(&result)?;
}
Ok(result)
}
#[cfg(not(feature = "cuda"))]
{
- diffuse_rs_common::bail!("`cuda` feature is not enabled")
+ diffusion_rs_common::bail!("`cuda` feature is not enabled")
}
}
}
diff --git a/diffuse_rs_backend/src/gguf/mod.rs b/diffusion_rs_backend/src/gguf/mod.rs
similarity index 93%
rename from diffuse_rs_backend/src/gguf/mod.rs
rename to diffusion_rs_backend/src/gguf/mod.rs
index ecdeb39..9cdded5 100644
--- a/diffuse_rs_backend/src/gguf/mod.rs
+++ b/diffusion_rs_backend/src/gguf/mod.rs
@@ -1,8 +1,8 @@
use std::sync::Arc;
-use diffuse_rs_common::core::Device;
-use diffuse_rs_common::core::{quantized::QMatMul, DType, Result, Tensor};
-use diffuse_rs_common::nn::Module;
+use diffusion_rs_common::core::Device;
+use diffusion_rs_common::core::{quantized::QMatMul, DType, Result, Tensor};
+use diffusion_rs_common::nn::Module;
use crate::{QuantMethod, QuantMethodConfig};
diff --git a/diffuse_rs_backend/src/lib.rs b/diffusion_rs_backend/src/lib.rs
similarity index 94%
rename from diffuse_rs_backend/src/lib.rs
rename to diffusion_rs_backend/src/lib.rs
index 603c3b0..69182da 100644
--- a/diffuse_rs_backend/src/lib.rs
+++ b/diffusion_rs_backend/src/lib.rs
@@ -3,7 +3,7 @@ use std::{
sync::Arc,
};
-use diffuse_rs_common::core::{
+use diffusion_rs_common::core::{
quantized::{GgmlDType, QTensor},
DType, Device, Result, Tensor,
};
@@ -21,8 +21,8 @@ pub use bitsandbytes::{BnbLinear, BnbQuantParmas, BnbQuantType};
pub use gguf::GgufMatMul;
pub use unquantized::UnquantLinear;
-use diffuse_rs_common::nn::{Linear, Module};
-use diffuse_rs_common::VarBuilder;
+use diffusion_rs_common::nn::{Linear, Module};
+use diffusion_rs_common::VarBuilder;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
@@ -108,7 +108,7 @@ pub enum IsqType {
}
impl TryFrom for GgmlDType {
- type Error = diffuse_rs_common::core::Error;
+ type Error = diffusion_rs_common::core::Error;
fn try_from(value: IsqType) -> Result {
let tp = match value {
@@ -124,7 +124,7 @@ impl TryFrom for GgmlDType {
IsqType::Q8K => Self::Q8K,
IsqType::Q8_0 => Self::Q8_0,
IsqType::Q8_1 => Self::Q8_1,
- _ => diffuse_rs_common::bail!("Expected valid GGML ISQ type."),
+ _ => diffusion_rs_common::bail!("Expected valid GGML ISQ type."),
};
#[cfg(feature = "cuda")]
{
@@ -141,7 +141,7 @@ impl TryFrom for GgmlDType {
| GgmlDType::Q5K
| GgmlDType::Q6K
) {
- diffuse_rs_common::bail!("GGML ISQ type on CUDA must be one of `Q4_0`, `Q4_1`, `Q5_0`, `Q5_1`, `Q8_0`, `Q2K`, `Q3K`, `Q4K`, `Q5K`, `Q6K`, `HQQ8`, `HQQ4`")
+ diffusion_rs_common::bail!("GGML ISQ type on CUDA must be one of `Q4_0`, `Q4_1`, `Q5_0`, `Q5_1`, `Q8_0`, `Q2K`, `Q3K`, `Q4K`, `Q5K`, `Q6K`, `HQQ8`, `HQQ4`")
}
}
Ok(tp)
diff --git a/diffuse_rs_backend/src/metal_kernels/bnb_dequantize.metal b/diffusion_rs_backend/src/metal_kernels/bnb_dequantize.metal
similarity index 100%
rename from diffuse_rs_backend/src/metal_kernels/bnb_dequantize.metal
rename to diffusion_rs_backend/src/metal_kernels/bnb_dequantize.metal
diff --git a/diffuse_rs_backend/src/metal_kernels/mod.rs b/diffusion_rs_backend/src/metal_kernels/mod.rs
similarity index 99%
rename from diffuse_rs_backend/src/metal_kernels/mod.rs
rename to diffusion_rs_backend/src/metal_kernels/mod.rs
index 8b416af..2049f4d 100644
--- a/diffuse_rs_backend/src/metal_kernels/mod.rs
+++ b/diffusion_rs_backend/src/metal_kernels/mod.rs
@@ -1,4 +1,4 @@
-use diffuse_rs_common::core::DType;
+use diffusion_rs_common::core::DType;
use metal::{
Buffer, CompileOptions, ComputeCommandEncoderRef, ComputePipelineState, Device, Function,
FunctionConstantValues, Library, MTLSize,
diff --git a/diffuse_rs_backend/src/metal_kernels/sdpa.metal b/diffusion_rs_backend/src/metal_kernels/sdpa.metal
similarity index 100%
rename from diffuse_rs_backend/src/metal_kernels/sdpa.metal
rename to diffusion_rs_backend/src/metal_kernels/sdpa.metal
diff --git a/diffuse_rs_backend/src/metal_kernels/utils.rs b/diffusion_rs_backend/src/metal_kernels/utils.rs
similarity index 100%
rename from diffuse_rs_backend/src/metal_kernels/utils.rs
rename to diffusion_rs_backend/src/metal_kernels/utils.rs
diff --git a/diffuse_rs_backend/src/ops.rs b/diffusion_rs_backend/src/ops.rs
similarity index 84%
rename from diffuse_rs_backend/src/ops.rs
rename to diffusion_rs_backend/src/ops.rs
index 2012e41..53c070a 100644
--- a/diffuse_rs_backend/src/ops.rs
+++ b/diffusion_rs_backend/src/ops.rs
@@ -1,4 +1,4 @@
-use diffuse_rs_common::core::{CpuStorage, Layout, Result, Shape, Tensor};
+use diffusion_rs_common::core::{CpuStorage, Layout, Result, Shape, Tensor};
#[allow(dead_code)]
struct Sdpa {
@@ -6,7 +6,7 @@ struct Sdpa {
softcapping: f32,
}
-impl diffuse_rs_common::core::CustomOp3 for Sdpa {
+impl diffusion_rs_common::core::CustomOp3 for Sdpa {
fn name(&self) -> &'static str {
"metal-sdpa"
}
@@ -20,21 +20,21 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa {
_s3: &CpuStorage,
_l3: &Layout,
) -> Result<(CpuStorage, Shape)> {
- diffuse_rs_common::bail!("SDPA has no cpu impl")
+ diffusion_rs_common::bail!("SDPA has no cpu impl")
}
#[cfg(feature = "metal")]
fn metal_fwd(
&self,
- q: &diffuse_rs_common::core::MetalStorage,
+ q: &diffusion_rs_common::core::MetalStorage,
q_l: &Layout,
- k: &diffuse_rs_common::core::MetalStorage,
+ k: &diffusion_rs_common::core::MetalStorage,
k_l: &Layout,
- v: &diffuse_rs_common::core::MetalStorage,
+ v: &diffusion_rs_common::core::MetalStorage,
v_l: &Layout,
- ) -> Result<(diffuse_rs_common::core::MetalStorage, Shape)> {
+ ) -> Result<(diffusion_rs_common::core::MetalStorage, Shape)> {
use crate::metal_kernels::SdpaDType;
- use diffuse_rs_common::core::{backend::BackendStorage, DType, Shape, D};
+ use diffusion_rs_common::core::{backend::BackendStorage, DType, Shape, D};
let device = q.device();
@@ -45,17 +45,17 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa {
// q,k must have matching emb dim
if q_l.dim(D::Minus1)? != k_l.dim(D::Minus1)? {
- diffuse_rs_common::bail!("`q` and `k` last dims must match");
+ diffusion_rs_common::bail!("`q` and `k` last dims must match");
}
// k,v must have matching n kv heads
if v_l.dim(D::Minus(3))? != k_l.dim(D::Minus(3))? {
- diffuse_rs_common::bail!("`k` and `v` head dims must match");
+ diffusion_rs_common::bail!("`k` and `v` head dims must match");
}
// n_heads % n_kv_heads == 0; n_heads >= 1, n_kv_heads >= 1.
if q_l.dim(D::Minus(3))? % k_l.dim(D::Minus(3))? != 0 {
- diffuse_rs_common::bail!("query `n_heads` must be a multiple of `n_kv_heads`");
+ diffusion_rs_common::bail!("query `n_heads` must be a multiple of `n_kv_heads`");
}
let k_head = k_l.dim(D::Minus1)?;
@@ -75,7 +75,7 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa {
implementation_supports_use_case &= supports_sdpa_full || supports_sdpa_vector;
if !supported_head_dim {
- diffuse_rs_common::bail!(
+ diffusion_rs_common::bail!(
"Meta SDPA does not support q head dim {q_head}: q dims {:?}, k dims {:?}, v dims {:?}.",
q_l.dims(),
k_l.dims(),
@@ -83,7 +83,7 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa {
);
}
if !implementation_supports_use_case {
- diffuse_rs_common::bail!(
+ diffusion_rs_common::bail!(
"Meta SDPA does not support q dims {:?}, k dims {:?}, v dims {:?}.",
q_l.dims(),
k_l.dims(),
@@ -93,7 +93,7 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa {
for t in [k.dtype(), v.dtype()] {
if q.dtype() != t {
- diffuse_rs_common::bail!("all q, k, v dtypes must match.");
+ diffusion_rs_common::bail!("all q, k, v dtypes must match.");
}
}
@@ -101,7 +101,7 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa {
DType::BF16 => SdpaDType::BF16,
DType::F16 => SdpaDType::F16,
DType::F32 => SdpaDType::F32,
- other => diffuse_rs_common::bail!("unsupported sdpa type {other:?}"),
+ other => diffusion_rs_common::bail!("unsupported sdpa type {other:?}"),
};
let command_buffer = q.device().command_buffer()?;
@@ -156,7 +156,7 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa {
self.softcapping,
itype,
)
- .map_err(diffuse_rs_common::core::Error::wrap)?;
+ .map_err(diffusion_rs_common::core::Error::wrap)?;
} else {
command_buffer.set_label("vector_attention");
crate::metal_kernels::call_sdpa_vector(
@@ -178,11 +178,11 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa {
self.softcapping,
itype,
)
- .map_err(diffuse_rs_common::core::Error::wrap)?;
+ .map_err(diffusion_rs_common::core::Error::wrap)?;
}
} else if supports_sdpa_full {
if q_l.dim(2)? != k_l.dim(2)? {
- diffuse_rs_common::bail!(
+ diffusion_rs_common::bail!(
"query and key sequence length must be equal if using full metal sdpa"
)
}
@@ -204,12 +204,12 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa {
self.softcapping,
itype,
)
- .map_err(diffuse_rs_common::core::Error::wrap)?;
+ .map_err(diffusion_rs_common::core::Error::wrap)?;
} else {
- diffuse_rs_common::bail!("must be vector or full sdpa kernel");
+ diffusion_rs_common::bail!("must be vector or full sdpa kernel");
}
- let newstorage = diffuse_rs_common::core::MetalStorage::new(
+ let newstorage = diffusion_rs_common::core::MetalStorage::new(
output,
device.clone(),
elem_count,
@@ -256,7 +256,7 @@ pub fn sdpa(q: &Tensor, k: &Tensor, v: &Tensor, scale: f32, softcapping: f32) ->
att = (att * softcapping as f64)?;
}
- att = diffuse_rs_common::nn::ops::softmax_last_dim(&att)?;
+ att = diffusion_rs_common::nn::ops::softmax_last_dim(&att)?;
att.matmul(v)
}
}
diff --git a/diffuse_rs_backend/src/unquantized/mod.rs b/diffusion_rs_backend/src/unquantized/mod.rs
similarity index 94%
rename from diffuse_rs_backend/src/unquantized/mod.rs
rename to diffusion_rs_backend/src/unquantized/mod.rs
index fe42448..acef39b 100644
--- a/diffuse_rs_backend/src/unquantized/mod.rs
+++ b/diffusion_rs_backend/src/unquantized/mod.rs
@@ -1,6 +1,6 @@
use std::sync::Arc;
-use diffuse_rs_common::core::{DType, Device, DeviceLocation, Result, Shape, Tensor, D};
+use diffusion_rs_common::core::{DType, Device, DeviceLocation, Result, Shape, Tensor, D};
use crate::{
cublaslt::{maybe_init_cublas_lt_wrapper, CUBLASLT_HANDLE},
@@ -14,7 +14,7 @@ pub struct UnquantLinear {
}
impl QuantMethod for UnquantLinear {
- fn new(method: QuantMethodConfig) -> diffuse_rs_common::core::Result
+ fn new(method: QuantMethodConfig) -> diffusion_rs_common::core::Result
where
Self: Sized,
{
diff --git a/diffuse_rs_cli/Cargo.toml b/diffusion_rs_cli/Cargo.toml
similarity index 57%
rename from diffuse_rs_cli/Cargo.toml
rename to diffusion_rs_cli/Cargo.toml
index 3117ab5..a44a244 100644
--- a/diffuse_rs_cli/Cargo.toml
+++ b/diffusion_rs_cli/Cargo.toml
@@ -1,10 +1,10 @@
[package]
-name = "diffuse_rs_cli"
+name = "diffusion_rs_cli"
readme = "README.md"
authors.workspace = true
version.workspace = true
edition.workspace = true
-description = "CLI for diffuse_rs"
+description = "CLI for diffusion_rs"
repository.workspace = true
keywords.workspace = true
categories.workspace = true
@@ -12,7 +12,7 @@ license.workspace = true
homepage.workspace = true
[dependencies]
-diffuse_rs_core = { path = "../diffuse_rs_core" }
+diffusion_rs_core = { path = "../diffusion_rs_core" }
clap.workspace = true
anyhow.workspace = true
tracing.workspace = true
@@ -20,8 +20,8 @@ tracing-subscriber.workspace = true
cliclack.workspace = true
[features]
-cuda = ["diffuse_rs_core/cuda"]
-cudnn = ["diffuse_rs_core/cudnn"]
-metal = ["diffuse_rs_core/metal"]
-accelerate = ["diffuse_rs_core/accelerate"]
-mkl = ["diffuse_rs_core/mkl"]
+cuda = ["diffusion_rs_core/cuda"]
+cudnn = ["diffusion_rs_core/cudnn"]
+metal = ["diffusion_rs_core/metal"]
+accelerate = ["diffusion_rs_core/accelerate"]
+mkl = ["diffusion_rs_core/mkl"]
diff --git a/diffusion_rs_cli/README.md b/diffusion_rs_cli/README.md
new file mode 100644
index 0000000..44b3b31
--- /dev/null
+++ b/diffusion_rs_cli/README.md
@@ -0,0 +1,22 @@
+# `diffusion_rs_cli`
+
+CLI for diffusion-rs.
+
+## Examples
+- FLUX dev:
+```
+diffusion_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf
+```
+
+```
+diffusion_rs_cli --scale 3.5 --num-steps 50 model-id -m black-forest-labs/FLUX.1-dev
+```
+
+- FLUX schnell:
+```
+diffusion_rs_cli --scale 0.0 --num-steps 4 dduf -f FLUX.1-schnell-Q8-bnb.dduf
+```
+
+```
+diffusion_rs_cli --scale 0.0 --num-steps 4 model-id -m black-forest-labs/FLUX.1-dev
+```
\ No newline at end of file
diff --git a/diffuse_rs_cli/src/main.rs b/diffusion_rs_cli/src/main.rs
similarity index 97%
rename from diffuse_rs_cli/src/main.rs
rename to diffusion_rs_cli/src/main.rs
index 432fe13..9b0cb7d 100644
--- a/diffuse_rs_cli/src/main.rs
+++ b/diffusion_rs_cli/src/main.rs
@@ -2,7 +2,9 @@ use cliclack::input;
use std::{path::PathBuf, time::Instant};
use clap::{Parser, Subcommand};
-use diffuse_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource};
+use diffusion_rs_core::{
+ DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource,
+};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::EnvFilter;
diff --git a/diffuse_rs_common/Cargo.toml b/diffusion_rs_common/Cargo.toml
similarity index 98%
rename from diffuse_rs_common/Cargo.toml
rename to diffusion_rs_common/Cargo.toml
index 7a3cd87..f774108 100644
--- a/diffuse_rs_common/Cargo.toml
+++ b/diffusion_rs_common/Cargo.toml
@@ -1,5 +1,5 @@
[package]
-name = "diffuse_rs_common"
+name = "diffusion_rs_common"
readme = "README.md"
authors.workspace = true
version.workspace = true
diff --git a/diffusion_rs_common/README.md b/diffusion_rs_common/README.md
new file mode 100644
index 0000000..7649a17
--- /dev/null
+++ b/diffusion_rs_common/README.md
@@ -0,0 +1,3 @@
+# `diffusion_rs_common`
+
+Common functionality for diffusion-rs, including core ML framework based on Candle, NN functionality, and DDUF loading.
\ No newline at end of file
diff --git a/diffuse_rs_common/build.rs b/diffusion_rs_common/build.rs
similarity index 100%
rename from diffuse_rs_common/build.rs
rename to diffusion_rs_common/build.rs
diff --git a/diffuse_rs_common/src/core/LICENSE b/diffusion_rs_common/src/core/LICENSE
similarity index 100%
rename from diffuse_rs_common/src/core/LICENSE
rename to diffusion_rs_common/src/core/LICENSE
diff --git a/diffuse_rs_common/src/core/accelerate.rs b/diffusion_rs_common/src/core/accelerate.rs
similarity index 100%
rename from diffuse_rs_common/src/core/accelerate.rs
rename to diffusion_rs_common/src/core/accelerate.rs
diff --git a/diffuse_rs_common/src/core/backend.rs b/diffusion_rs_common/src/core/backend.rs
similarity index 100%
rename from diffuse_rs_common/src/core/backend.rs
rename to diffusion_rs_common/src/core/backend.rs
diff --git a/diffuse_rs_common/src/core/backprop.rs b/diffusion_rs_common/src/core/backprop.rs
similarity index 100%
rename from diffuse_rs_common/src/core/backprop.rs
rename to diffusion_rs_common/src/core/backprop.rs
diff --git a/diffuse_rs_common/src/core/conv.rs b/diffusion_rs_common/src/core/conv.rs
similarity index 100%
rename from diffuse_rs_common/src/core/conv.rs
rename to diffusion_rs_common/src/core/conv.rs
diff --git a/diffuse_rs_common/src/core/convert.rs b/diffusion_rs_common/src/core/convert.rs
similarity index 100%
rename from diffuse_rs_common/src/core/convert.rs
rename to diffusion_rs_common/src/core/convert.rs
diff --git a/diffuse_rs_common/src/core/cpu/avx.rs b/diffusion_rs_common/src/core/cpu/avx.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cpu/avx.rs
rename to diffusion_rs_common/src/core/cpu/avx.rs
diff --git a/diffuse_rs_common/src/core/cpu/erf.rs b/diffusion_rs_common/src/core/cpu/erf.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cpu/erf.rs
rename to diffusion_rs_common/src/core/cpu/erf.rs
diff --git a/diffuse_rs_common/src/core/cpu/kernels.rs b/diffusion_rs_common/src/core/cpu/kernels.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cpu/kernels.rs
rename to diffusion_rs_common/src/core/cpu/kernels.rs
diff --git a/diffuse_rs_common/src/core/cpu/mod.rs b/diffusion_rs_common/src/core/cpu/mod.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cpu/mod.rs
rename to diffusion_rs_common/src/core/cpu/mod.rs
diff --git a/diffuse_rs_common/src/core/cpu/neon.rs b/diffusion_rs_common/src/core/cpu/neon.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cpu/neon.rs
rename to diffusion_rs_common/src/core/cpu/neon.rs
diff --git a/diffuse_rs_common/src/core/cpu/simd128.rs b/diffusion_rs_common/src/core/cpu/simd128.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cpu/simd128.rs
rename to diffusion_rs_common/src/core/cpu/simd128.rs
diff --git a/diffuse_rs_common/src/core/cpu_backend/mod.rs b/diffusion_rs_common/src/core/cpu_backend/mod.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cpu_backend/mod.rs
rename to diffusion_rs_common/src/core/cpu_backend/mod.rs
diff --git a/diffuse_rs_common/src/core/cpu_backend/utils.rs b/diffusion_rs_common/src/core/cpu_backend/utils.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cpu_backend/utils.rs
rename to diffusion_rs_common/src/core/cpu_backend/utils.rs
diff --git a/diffuse_rs_common/src/core/cuda_backend/cudnn.rs b/diffusion_rs_common/src/core/cuda_backend/cudnn.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cuda_backend/cudnn.rs
rename to diffusion_rs_common/src/core/cuda_backend/cudnn.rs
diff --git a/diffuse_rs_common/src/core/cuda_backend/device.rs b/diffusion_rs_common/src/core/cuda_backend/device.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cuda_backend/device.rs
rename to diffusion_rs_common/src/core/cuda_backend/device.rs
diff --git a/diffuse_rs_common/src/core/cuda_backend/error.rs b/diffusion_rs_common/src/core/cuda_backend/error.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cuda_backend/error.rs
rename to diffusion_rs_common/src/core/cuda_backend/error.rs
diff --git a/diffuse_rs_common/src/core/cuda_backend/mod.rs b/diffusion_rs_common/src/core/cuda_backend/mod.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cuda_backend/mod.rs
rename to diffusion_rs_common/src/core/cuda_backend/mod.rs
diff --git a/diffuse_rs_common/src/core/cuda_backend/utils.rs b/diffusion_rs_common/src/core/cuda_backend/utils.rs
similarity index 100%
rename from diffuse_rs_common/src/core/cuda_backend/utils.rs
rename to diffusion_rs_common/src/core/cuda_backend/utils.rs
diff --git a/diffuse_rs_common/src/core/custom_op.rs b/diffusion_rs_common/src/core/custom_op.rs
similarity index 100%
rename from diffuse_rs_common/src/core/custom_op.rs
rename to diffusion_rs_common/src/core/custom_op.rs
diff --git a/diffuse_rs_common/src/core/device.rs b/diffusion_rs_common/src/core/device.rs
similarity index 100%
rename from diffuse_rs_common/src/core/device.rs
rename to diffusion_rs_common/src/core/device.rs
diff --git a/diffuse_rs_common/src/core/display.rs b/diffusion_rs_common/src/core/display.rs
similarity index 100%
rename from diffuse_rs_common/src/core/display.rs
rename to diffusion_rs_common/src/core/display.rs
diff --git a/diffuse_rs_common/src/core/dtype.rs b/diffusion_rs_common/src/core/dtype.rs
similarity index 100%
rename from diffuse_rs_common/src/core/dtype.rs
rename to diffusion_rs_common/src/core/dtype.rs
diff --git a/diffuse_rs_common/src/core/dummy_cuda_backend.rs b/diffusion_rs_common/src/core/dummy_cuda_backend.rs
similarity index 100%
rename from diffuse_rs_common/src/core/dummy_cuda_backend.rs
rename to diffusion_rs_common/src/core/dummy_cuda_backend.rs
diff --git a/diffuse_rs_common/src/core/dummy_metal_backend.rs b/diffusion_rs_common/src/core/dummy_metal_backend.rs
similarity index 100%
rename from diffuse_rs_common/src/core/dummy_metal_backend.rs
rename to diffusion_rs_common/src/core/dummy_metal_backend.rs
diff --git a/diffuse_rs_common/src/core/error.rs b/diffusion_rs_common/src/core/error.rs
similarity index 100%
rename from diffuse_rs_common/src/core/error.rs
rename to diffusion_rs_common/src/core/error.rs
diff --git a/diffuse_rs_common/src/core/indexer.rs b/diffusion_rs_common/src/core/indexer.rs
similarity index 93%
rename from diffuse_rs_common/src/core/indexer.rs
rename to diffusion_rs_common/src/core/indexer.rs
index 3215b42..b4ee963 100644
--- a/diffuse_rs_common/src/core/indexer.rs
+++ b/diffusion_rs_common/src/core/indexer.rs
@@ -7,7 +7,7 @@ impl Tensor {
/// Intended to be use by the trait `.i()`
///
/// ```
- /// # use diffuse_rs_common::core::{Tensor, DType, Device, IndexOp};
+ /// # use diffusion_rs_common::core::{Tensor, DType, Device, IndexOp};
/// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
///
/// let c = a.i(0..1)?;
@@ -22,7 +22,7 @@ impl Tensor {
/// let c = a.i((.., ..=2))?;
/// assert_eq!(c.shape().dims(), &[2, 3]);
///
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
fn index(&self, indexers: &[TensorIndexer]) -> Result {
let mut x = self.clone();
@@ -142,7 +142,7 @@ where
T: Into,
{
///```rust
- /// use diffuse_rs_common::core::{Tensor, DType, Device, IndexOp};
+ /// use diffusion_rs_common::core::{Tensor, DType, Device, IndexOp};
/// let a = Tensor::new(&[
/// [0., 1.],
/// [2., 3.],
@@ -166,7 +166,7 @@ where
/// [2., 3.],
/// [4., 5.]
/// ]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
fn i(&self, index: T) -> Result {
self.index(&[index.into()])
@@ -178,7 +178,7 @@ where
A: Into,
{
///```rust
- /// use diffuse_rs_common::core::{Tensor, DType, Device, IndexOp};
+ /// use diffusion_rs_common::core::{Tensor, DType, Device, IndexOp};
/// let a = Tensor::new(&[
/// [0f32, 1.],
/// [2. , 3.],
@@ -202,7 +202,7 @@ where
/// [2., 3.],
/// [4., 5.]
/// ]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
fn i(&self, (a,): (A,)) -> Result {
self.index(&[a.into()])
@@ -215,7 +215,7 @@ where
B: Into,
{
///```rust
- /// use diffuse_rs_common::core::{Tensor, DType, Device, IndexOp};
+ /// use diffusion_rs_common::core::{Tensor, DType, Device, IndexOp};
/// let a = Tensor::new(&[[0f32, 1., 2.], [3., 4., 5.], [6., 7., 8.]], &Device::Cpu)?;
///
/// let b = a.i((1, 0))?;
@@ -228,7 +228,7 @@ where
/// let d = a.i((2.., ..))?;
/// assert_eq!(c.shape().dims(), &[2]);
/// assert_eq!(c.to_vec1::()?, &[1., 4.]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
fn i(&self, (a, b): (A, B)) -> Result {
self.index(&[a.into(), b.into()])
diff --git a/diffuse_rs_common/src/core/layout.rs b/diffusion_rs_common/src/core/layout.rs
similarity index 100%
rename from diffuse_rs_common/src/core/layout.rs
rename to diffusion_rs_common/src/core/layout.rs
diff --git a/diffuse_rs_common/src/core/metal_backend/device.rs b/diffusion_rs_common/src/core/metal_backend/device.rs
similarity index 100%
rename from diffuse_rs_common/src/core/metal_backend/device.rs
rename to diffusion_rs_common/src/core/metal_backend/device.rs
diff --git a/diffuse_rs_common/src/core/metal_backend/mod.rs b/diffusion_rs_common/src/core/metal_backend/mod.rs
similarity index 100%
rename from diffuse_rs_common/src/core/metal_backend/mod.rs
rename to diffusion_rs_common/src/core/metal_backend/mod.rs
diff --git a/diffuse_rs_common/src/core/mkl.rs b/diffusion_rs_common/src/core/mkl.rs
similarity index 100%
rename from diffuse_rs_common/src/core/mkl.rs
rename to diffusion_rs_common/src/core/mkl.rs
diff --git a/diffuse_rs_common/src/core/mod.rs b/diffusion_rs_common/src/core/mod.rs
similarity index 100%
rename from diffuse_rs_common/src/core/mod.rs
rename to diffusion_rs_common/src/core/mod.rs
diff --git a/diffuse_rs_common/src/core/npy.rs b/diffusion_rs_common/src/core/npy.rs
similarity index 100%
rename from diffuse_rs_common/src/core/npy.rs
rename to diffusion_rs_common/src/core/npy.rs
diff --git a/diffuse_rs_common/src/core/op.rs b/diffusion_rs_common/src/core/op.rs
similarity index 100%
rename from diffuse_rs_common/src/core/op.rs
rename to diffusion_rs_common/src/core/op.rs
diff --git a/diffuse_rs_common/src/core/pickle.rs b/diffusion_rs_common/src/core/pickle.rs
similarity index 100%
rename from diffuse_rs_common/src/core/pickle.rs
rename to diffusion_rs_common/src/core/pickle.rs
diff --git a/diffuse_rs_common/src/core/quantized/avx.rs b/diffusion_rs_common/src/core/quantized/avx.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/avx.rs
rename to diffusion_rs_common/src/core/quantized/avx.rs
diff --git a/diffuse_rs_common/src/core/quantized/cuda.rs b/diffusion_rs_common/src/core/quantized/cuda.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/cuda.rs
rename to diffusion_rs_common/src/core/quantized/cuda.rs
diff --git a/diffuse_rs_common/src/core/quantized/dummy_cuda.rs b/diffusion_rs_common/src/core/quantized/dummy_cuda.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/dummy_cuda.rs
rename to diffusion_rs_common/src/core/quantized/dummy_cuda.rs
diff --git a/diffuse_rs_common/src/core/quantized/dummy_metal.rs b/diffusion_rs_common/src/core/quantized/dummy_metal.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/dummy_metal.rs
rename to diffusion_rs_common/src/core/quantized/dummy_metal.rs
diff --git a/diffuse_rs_common/src/core/quantized/ggml_file.rs b/diffusion_rs_common/src/core/quantized/ggml_file.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/ggml_file.rs
rename to diffusion_rs_common/src/core/quantized/ggml_file.rs
diff --git a/diffuse_rs_common/src/core/quantized/gguf_file.rs b/diffusion_rs_common/src/core/quantized/gguf_file.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/gguf_file.rs
rename to diffusion_rs_common/src/core/quantized/gguf_file.rs
diff --git a/diffuse_rs_common/src/core/quantized/imatrix_file.rs b/diffusion_rs_common/src/core/quantized/imatrix_file.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/imatrix_file.rs
rename to diffusion_rs_common/src/core/quantized/imatrix_file.rs
diff --git a/diffuse_rs_common/src/core/quantized/k_quants.rs b/diffusion_rs_common/src/core/quantized/k_quants.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/k_quants.rs
rename to diffusion_rs_common/src/core/quantized/k_quants.rs
diff --git a/diffuse_rs_common/src/core/quantized/metal.rs b/diffusion_rs_common/src/core/quantized/metal.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/metal.rs
rename to diffusion_rs_common/src/core/quantized/metal.rs
diff --git a/diffuse_rs_common/src/core/quantized/mod.rs b/diffusion_rs_common/src/core/quantized/mod.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/mod.rs
rename to diffusion_rs_common/src/core/quantized/mod.rs
diff --git a/diffuse_rs_common/src/core/quantized/neon.rs b/diffusion_rs_common/src/core/quantized/neon.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/neon.rs
rename to diffusion_rs_common/src/core/quantized/neon.rs
diff --git a/diffuse_rs_common/src/core/quantized/simd128.rs b/diffusion_rs_common/src/core/quantized/simd128.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/simd128.rs
rename to diffusion_rs_common/src/core/quantized/simd128.rs
diff --git a/diffuse_rs_common/src/core/quantized/utils.rs b/diffusion_rs_common/src/core/quantized/utils.rs
similarity index 100%
rename from diffuse_rs_common/src/core/quantized/utils.rs
rename to diffusion_rs_common/src/core/quantized/utils.rs
diff --git a/diffuse_rs_common/src/core/safetensors.rs b/diffusion_rs_common/src/core/safetensors.rs
similarity index 100%
rename from diffuse_rs_common/src/core/safetensors.rs
rename to diffusion_rs_common/src/core/safetensors.rs
diff --git a/diffuse_rs_common/src/core/scalar.rs b/diffusion_rs_common/src/core/scalar.rs
similarity index 100%
rename from diffuse_rs_common/src/core/scalar.rs
rename to diffusion_rs_common/src/core/scalar.rs
diff --git a/diffuse_rs_common/src/core/shape.rs b/diffusion_rs_common/src/core/shape.rs
similarity index 100%
rename from diffuse_rs_common/src/core/shape.rs
rename to diffusion_rs_common/src/core/shape.rs
diff --git a/diffuse_rs_common/src/core/sort.rs b/diffusion_rs_common/src/core/sort.rs
similarity index 100%
rename from diffuse_rs_common/src/core/sort.rs
rename to diffusion_rs_common/src/core/sort.rs
diff --git a/diffuse_rs_common/src/core/storage.rs b/diffusion_rs_common/src/core/storage.rs
similarity index 100%
rename from diffuse_rs_common/src/core/storage.rs
rename to diffusion_rs_common/src/core/storage.rs
diff --git a/diffuse_rs_common/src/core/streaming.rs b/diffusion_rs_common/src/core/streaming.rs
similarity index 100%
rename from diffuse_rs_common/src/core/streaming.rs
rename to diffusion_rs_common/src/core/streaming.rs
diff --git a/diffuse_rs_common/src/core/strided_index.rs b/diffusion_rs_common/src/core/strided_index.rs
similarity index 100%
rename from diffuse_rs_common/src/core/strided_index.rs
rename to diffusion_rs_common/src/core/strided_index.rs
diff --git a/diffuse_rs_common/src/core/tensor.rs b/diffusion_rs_common/src/core/tensor.rs
similarity index 96%
rename from diffuse_rs_common/src/core/tensor.rs
rename to diffusion_rs_common/src/core/tensor.rs
index ccdd1dd..c2dda1a 100644
--- a/diffuse_rs_common/src/core/tensor.rs
+++ b/diffusion_rs_common/src/core/tensor.rs
@@ -56,13 +56,13 @@ impl AsRef for Tensor {
/// The core struct for manipulating tensors.
///
/// ```rust
-/// use diffuse_rs_common::core::{Tensor, DType, Device};
+/// use diffusion_rs_common::core::{Tensor, DType, Device};
///
/// let a = Tensor::arange(0f32, 6f32, &Device::Cpu)?.reshape((2, 3))?;
/// let b = Tensor::arange(0f32, 12f32, &Device::Cpu)?.reshape((3, 4))?;
///
/// let c = a.matmul(&b)?;
-/// # Ok::<(), diffuse_rs_common::core::Error>(())
+/// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
///
/// Tensors are reference counted with [`Arc`] so cloning them is cheap.
@@ -209,11 +209,11 @@ impl Tensor {
/// Creates a new tensor filled with ones.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, DType, Device};
+ /// use diffusion_rs_common::core::{Tensor, DType, Device};
/// let a = Tensor::ones((2, 3), DType::F32, &Device::Cpu)?;
/// let b = Tensor::from_slice(&[1.0f32, 1.0, 1.0, 1.0, 1.0, 1.0], (2, 3), &Device::Cpu)?;
/// // a == b
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn ones>(shape: S, dtype: DType, device: &Device) -> Result {
Self::ones_impl(shape, dtype, device, false)
@@ -222,11 +222,11 @@ impl Tensor {
/// Creates a new tensor filled with ones with same shape, dtype, and device as the other tensor.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, DType, Device};
+ /// use diffusion_rs_common::core::{Tensor, DType, Device};
/// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
/// let b = a.ones_like()?;
/// // b == a + 1
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn ones_like(&self) -> Result {
Tensor::ones(self.shape(), self.dtype(), self.device())
@@ -249,11 +249,11 @@ impl Tensor {
/// Creates a new tensor filled with zeros.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, DType, Device};
+ /// use diffusion_rs_common::core::{Tensor, DType, Device};
/// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
/// let b = Tensor::from_slice(&[0.0f32, 0.0, 0.0, 0.0, 0.0, 0.0], (2, 3), &Device::Cpu)?;
/// // a == b
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn zeros>(shape: S, dtype: DType, device: &Device) -> Result {
Self::zeros_impl(shape, dtype, device, false)
@@ -263,11 +263,11 @@ impl Tensor {
/// tensor.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, DType, Device};
+ /// use diffusion_rs_common::core::{Tensor, DType, Device};
/// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
/// let b = a.zeros_like()?;
/// // b is on CPU f32.
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn zeros_like(&self) -> Result {
Tensor::zeros(self.shape(), self.dtype(), self.device())
@@ -293,10 +293,10 @@ impl Tensor {
/// This returns uninitialized memory.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, DType, Device};
+ /// use diffusion_rs_common::core::{Tensor, DType, Device};
/// let a = unsafe { Tensor::empty((2, 3), DType::F32, &Device::Cpu)? };
/// // a == b
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub unsafe fn empty>(shape: S, dtype: DType, device: &Device) -> Result {
Self::empty_impl(shape, dtype, device, false)
@@ -309,10 +309,10 @@ impl Tensor {
/// This returns uninitialized memory.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, DType, Device};
+ /// use diffusion_rs_common::core::{Tensor, DType, Device};
/// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
/// let b = unsafe { a.empty_like()? };
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub unsafe fn empty_like(&self) -> Result {
Tensor::empty(self.shape(), self.dtype(), self.device())
@@ -433,14 +433,14 @@ impl Tensor {
/// Returns a new tensor with all the elements having the same specified value. Note that
/// the tensor is not contiguous so you would have to call `.contiguous()` on it if needed.
///```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let a = Tensor::full(3.5, (2, 4), &Device::Cpu)?;
///
/// assert_eq!(a.to_vec2::()?, &[
/// [3.5, 3.5, 3.5, 3.5],
/// [3.5, 3.5, 3.5, 3.5],
/// ]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
pub fn full>(
value: D,
shape: S,
@@ -451,11 +451,11 @@ impl Tensor {
/// Creates a new 1D tensor from an iterator.
///```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let a = Tensor::from_iter( [1.0, 2.0, 3.0, 4.0].into_iter(), &Device::Cpu)?;
///
/// assert_eq!(a.to_vec1::()?, &[1.0, 2.0, 3.0, 4.0]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn from_iter(
iter: impl IntoIterator- ,
@@ -469,11 +469,11 @@ impl Tensor {
/// Creates a new 1D tensor with values from the interval `[start, end)` taken with a common
/// difference `1` from `start`.
///```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let a = Tensor::arange(2., 5., &Device::Cpu)?;
///
/// assert_eq!(a.to_vec1::()?, &[2., 3., 4.]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn arange(start: D, end: D, device: &Device) -> Result {
Self::arange_step(start, end, D::one(), device)
@@ -482,11 +482,11 @@ impl Tensor {
/// Creates a new 1D tensor with values from the interval `[start, end)` taken with a common
/// difference `step` from `start`.
///```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let a = Tensor::arange_step(2.0, 4.0, 0.5, &Device::Cpu)?;
///
/// assert_eq!(a.to_vec1::()?, &[2.0, 2.5, 3.0, 3.5]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn arange_step(
start: D,
@@ -534,14 +534,14 @@ impl Tensor {
/// in this vector must be the same as the number of elements defined by the shape.
/// If the device is cpu, no data copy is made.
///```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let a = Tensor::from_vec(vec!{1., 2., 3., 4., 5., 6.}, (2, 3), &Device::Cpu)?;
///
/// assert_eq!(a.to_vec2::()?, &[
/// [1., 2., 3.],
/// [4., 5., 6.]
/// ]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn from_vec, D: crate::core::WithDType>(
data: Vec,
@@ -554,7 +554,7 @@ impl Tensor {
/// Creates a new tensor initialized with values from the input slice. The number of elements
/// in this vector must be the same as the number of elements defined by the shape.
///```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let values = vec![1., 2., 3., 4., 5., 6., 7., 8.];
/// let a = Tensor::from_slice(&values[1..7], (2, 3), &Device::Cpu)?;
///
@@ -562,7 +562,7 @@ impl Tensor {
/// [2., 3., 4.],
/// [5., 6., 7.]
/// ]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn from_slice, D: crate::core::WithDType>(
array: &[D],
@@ -710,7 +710,7 @@ impl Tensor {
/// # Examples
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device, Shape};
+ /// use diffusion_rs_common::core::{Tensor, Device, Shape};
/// let x = Tensor::new(&[1f32, 2., 3.], &Device::Cpu)?;
/// let y = Tensor::new(&[4f32, 5., 6.], &Device::Cpu)?;
///
@@ -726,7 +726,7 @@ impl Tensor {
///
/// assert_eq!(grids_ij[0].to_vec2::()?, &[[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]);
/// assert_eq!(grids_ij[1].to_vec2::()?, &[[4., 5., 6.], [4., 5., 6.], [4., 5., 6.]]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
///
/// # Errors
@@ -769,11 +769,11 @@ impl Tensor {
/// be performed.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let a = Tensor::new(&[[0f32, 1.], [2., 3.]], &Device::Cpu)?;
/// let a = a.affine(4., -2.)?;
/// assert_eq!(a.to_vec2::()?, &[[-2.0, 2.0], [6.0, 10.0]]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn affine(&self, mul: f64, add: f64) -> Result {
if self.elem_count() == 0 {
@@ -846,7 +846,7 @@ impl Tensor {
/// Returns a new tensor that is a narrowed version of the input, the dimension `dim`
/// ranges from `start` to `start + len`.
/// ```
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let a = Tensor::new(&[
/// [0f32, 1., 2.],
/// [3. , 4., 5.],
@@ -867,7 +867,7 @@ impl Tensor {
/// [4.],
/// [7.]
/// ]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn narrow(&self, dim: D, start: usize, len: usize) -> Result {
let dims = self.dims();
@@ -971,14 +971,14 @@ impl Tensor {
/// Elements that are shifted beyond the last position are re-introduced at the first position.
///
/// ```rust
- /// # use diffuse_rs_common::core::{Tensor, Device};
+ /// # use diffusion_rs_common::core::{Tensor, Device};
/// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?;
/// let tensor = tensor.roll(1, 0)?;
/// assert_eq!(tensor.to_vec2::()?, &[[4., 5.], [0., 1.], [2., 3.]]);
/// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?;
/// let tensor = tensor.roll(-1, 0)?;
/// assert_eq!(tensor.to_vec2::()?, &[[2., 3.], [4., 5.], [0., 1.]]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn roll(&self, shift: i32, dim: D) -> Result
where
@@ -1003,7 +1003,7 @@ impl Tensor {
/// that the number of elements for each dimension index in `sum_dims` is 1.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let a = Tensor::new(&[[0f32, 1.], [2., 3.]], &Device::Cpu)?;
/// let s = a.sum_keepdim(0)?;
/// assert_eq!(s.to_vec2::()?, &[[2., 4.]]);
@@ -1011,7 +1011,7 @@ impl Tensor {
/// assert_eq!(s.to_vec2::()?, &[[1.], [5.]]);
/// let s = a.sum_keepdim((0, 1))?;
/// assert_eq!(s.to_vec2::()?, &[[6.]]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn sum_keepdim(&self, sum_dims: D) -> Result {
self.sum_impl(sum_dims, true)
@@ -1031,7 +1031,7 @@ impl Tensor {
/// that the number of elements for each dimension index in `mean_dims` is 1.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let a = Tensor::new(&[[0f32, 1.], [2., 3.]], &Device::Cpu)?;
/// let s = a.mean_keepdim(0)?;
/// assert_eq!(s.to_vec2::()?, &[[1., 2.]]);
@@ -1039,7 +1039,7 @@ impl Tensor {
/// assert_eq!(s.to_vec2::()?, &[[0.5], [2.5]]);
/// let s = a.mean_keepdim((0, 1))?;
/// assert_eq!(s.to_vec2::()?, &[[1.5]]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn mean_keepdim(&self, mean_dims: D) -> Result {
let mean_dims = mean_dims.to_indexes(self.shape(), "mean-keepdim")?;
@@ -1559,12 +1559,12 @@ impl Tensor {
/// vocabulary size, and `h` the hidden size.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let values = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?;
/// let ids = Tensor::new(&[2u32, 1u32, 2u32], &Device::Cpu)?;
/// let emb = values.embedding(&ids)?;
/// assert_eq!(emb.to_vec2::()?, &[[4., 5.], [2., 3.], [4., 5.]]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn embedding(&self, ids: &Self) -> Result {
if self.rank() != 2 || ids.rank() != 1 {
@@ -1754,11 +1754,11 @@ impl Tensor {
/// scalar with zero dimensions.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?;
/// let tensor = tensor.sum_all()?;
/// assert_eq!(tensor.to_scalar::()?, 15.);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn sum_all(&self) -> Result {
let dims: Vec<_> = (0..self.rank()).collect();
@@ -1819,11 +1819,11 @@ impl Tensor {
/// Flattens the input tensor by reshaping it into a one dimension tensor.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?;
/// let tensor = tensor.flatten_all()?;
/// assert_eq!(tensor.to_vec1::()?, &[0., 1., 2., 3., 4., 5.]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn flatten_all(&self) -> Result {
self.flatten_(None::, None::)
@@ -1832,13 +1832,13 @@ impl Tensor {
/// Returns the sub-tensor fixing the index at `i` on the first dimension.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?;
/// let t = tensor.get(0)?;
/// assert_eq!(t.to_vec1::()?, &[0., 1.]);
/// let t = tensor.get(1)?;
/// assert_eq!(t.to_vec1::()?, &[2., 3.]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn get(&self, i: usize) -> Result {
let dims = self.dims();
@@ -1852,7 +1852,7 @@ impl Tensor {
/// Returns the sub-tensor fixing the index at `index` on the dimension `dim`.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?;
/// let t = tensor.get_on_dim(1, 0)?;
/// assert_eq!(t.to_vec1::()?, &[0., 2., 4.]);
@@ -1860,7 +1860,7 @@ impl Tensor {
/// assert_eq!(t.to_vec1::()?, &[1., 3., 5.]);
/// let t = tensor.get_on_dim(0, 1)?;
/// assert_eq!(t.to_vec1::()?, &[2., 3.]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn get_on_dim(&self, dim: D, index: usize) -> Result {
let dim = dim.to_index(self.shape(), "get_on_dim")?;
@@ -1871,11 +1871,11 @@ impl Tensor {
/// input are swapped.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?;
/// let tensor = tensor.t()?;
/// assert_eq!(tensor.to_vec2::()?, &[[0.0, 2.0, 4.0], [1.0, 3.0, 5.0]]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn t(&self) -> Result {
let rank = self.rank();
@@ -1915,12 +1915,12 @@ impl Tensor {
/// dims must be a permutation, i.e. include each dimension index exactly once.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let tensor = Tensor::arange(0u32, 120u32, &Device::Cpu)?.reshape((2, 3, 4, 5))?;
/// assert_eq!(tensor.dims(), &[2, 3, 4, 5]);
/// let tensor = tensor.permute((2, 3, 1, 0))?;
/// assert_eq!(tensor.dims(), &[4, 5, 3, 2]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn permute(&self, dims: D) -> Result {
let dims = dims.to_indexes(self.shape(), "permute")?;
@@ -2074,12 +2074,12 @@ impl Tensor {
/// Casts the input tensor to the target `dtype`.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, Device};
+ /// use diffusion_rs_common::core::{Tensor, Device};
/// let tensor = Tensor::new(3.14159265358979f64, &Device::Cpu)?;
/// assert_eq!(tensor.to_scalar::()?, 3.14159265358979);
- /// let tensor = tensor.to_dtype(diffuse_rs_common::core::DType::F32)?;
+ /// let tensor = tensor.to_dtype(diffusion_rs_common::core::DType::F32)?;
/// assert_eq!(tensor.to_scalar::()?, 3.1415927);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn to_dtype(&self, dtype: DType) -> Result {
if self.dtype() == dtype {
@@ -2137,7 +2137,7 @@ impl Tensor {
/// as to match the number of elements in the tensor.
///
/// ```rust
- /// # use diffuse_rs_common::core::{Tensor, DType, Device, D};
+ /// # use diffusion_rs_common::core::{Tensor, DType, Device, D};
/// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
///
/// let c = a.reshape((1, 6))?;
@@ -2149,7 +2149,7 @@ impl Tensor {
/// let c = a.reshape((2, (), 1))?;
/// assert_eq!(c.shape().dims(), &[2, 3, 1]);
///
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn reshape(&self, s: S) -> Result {
let shape = s.into_shape(self.elem_count())?;
@@ -2184,7 +2184,7 @@ impl Tensor {
/// Creates a new tensor with the specified dimension removed if its size was one.
///
/// ```rust
- /// # use diffuse_rs_common::core::{Tensor, DType, Device, D};
+ /// # use diffusion_rs_common::core::{Tensor, DType, Device, D};
/// let a = Tensor::zeros((2, 3, 1), DType::F32, &Device::Cpu)?;
///
/// let c = a.squeeze(2)?;
@@ -2192,7 +2192,7 @@ impl Tensor {
///
/// let c = a.squeeze(D::Minus1)?;
/// assert_eq!(c.shape().dims(), &[2, 3]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn squeeze(&self, dim: D) -> Result {
// The PyTorch semantics are to return the same tensor if the target dimension
@@ -2222,7 +2222,7 @@ impl Tensor {
/// Creates a new tensor with a dimension of size one inserted at the specified position.
///
/// ```rust
- /// # use diffuse_rs_common::core::{Tensor, DType, Device, D};
+ /// # use diffusion_rs_common::core::{Tensor, DType, Device, D};
/// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
///
/// let c = a.unsqueeze(0)?;
@@ -2230,7 +2230,7 @@ impl Tensor {
///
/// let c = a.unsqueeze(D::Minus1)?;
/// assert_eq!(c.shape().dims(), &[2, 3, 1]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn unsqueeze(&self, dim: D) -> Result {
let mut dims = self.dims().to_vec();
@@ -2259,7 +2259,7 @@ impl Tensor {
/// All tensors must have the same rank, and the output has one additional rank
///
/// ```rust
- /// # use diffuse_rs_common::core::{Tensor, DType, Device};
+ /// # use diffusion_rs_common::core::{Tensor, DType, Device};
/// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
/// let b = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
///
@@ -2268,7 +2268,7 @@ impl Tensor {
///
/// let c = Tensor::stack(&[&a, &b], 2)?;
/// assert_eq!(c.shape().dims(), &[2, 3, 2]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn stack, D: Dim>(args: &[A], dim: D) -> Result {
if args.is_empty() {
diff --git a/diffuse_rs_common/src/core/tensor_cat.rs b/diffusion_rs_common/src/core/tensor_cat.rs
similarity index 98%
rename from diffuse_rs_common/src/core/tensor_cat.rs
rename to diffusion_rs_common/src/core/tensor_cat.rs
index 7f99c2c..dee402d 100644
--- a/diffuse_rs_common/src/core/tensor_cat.rs
+++ b/diffusion_rs_common/src/core/tensor_cat.rs
@@ -7,7 +7,7 @@ impl Tensor {
/// the same rank
///
/// ```rust
- /// # use diffuse_rs_common::core::{Tensor, DType, Device};
+ /// # use diffusion_rs_common::core::{Tensor, DType, Device};
/// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
/// let b = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?;
///
@@ -16,7 +16,7 @@ impl Tensor {
///
/// let c = Tensor::cat(&[&a, &b], 1)?;
/// assert_eq!(c.shape().dims(), &[2, 6]);
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn cat, D: Dim>(args: &[A], dim: D) -> Result {
if args.is_empty() {
diff --git a/diffuse_rs_common/src/core/tensor_indexing.rs b/diffusion_rs_common/src/core/tensor_indexing.rs
similarity index 99%
rename from diffuse_rs_common/src/core/tensor_indexing.rs
rename to diffusion_rs_common/src/core/tensor_indexing.rs
index a5aebec..58f37a5 100644
--- a/diffuse_rs_common/src/core/tensor_indexing.rs
+++ b/diffusion_rs_common/src/core/tensor_indexing.rs
@@ -72,7 +72,7 @@ impl Tensor {
///
/// # Example
/// ```rust
- /// use diffuse_rs_common::core::{Device, Tensor};
+ /// use diffusion_rs_common::core::{Device, Tensor};
///
/// let dev = Device::Cpu;
/// let tensor = Tensor::arange(0u32, 4 * 5, &dev)?.reshape((4, 5))?;
@@ -87,7 +87,7 @@ impl Tensor {
/// [15, 16, 17, 18, 19]
/// ]
/// );
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn slice_assign(&self, ranges: &[&dyn RangeBound], src: &Tensor) -> Result {
let src_dims = src.dims();
diff --git a/diffuse_rs_common/src/core/test_utils.rs b/diffusion_rs_common/src/core/test_utils.rs
similarity index 100%
rename from diffuse_rs_common/src/core/test_utils.rs
rename to diffusion_rs_common/src/core/test_utils.rs
diff --git a/diffuse_rs_common/src/core/tests/conv_tests.rs b/diffusion_rs_common/src/core/tests/conv_tests.rs
similarity index 99%
rename from diffuse_rs_common/src/core/tests/conv_tests.rs
rename to diffusion_rs_common/src/core/tests/conv_tests.rs
index 3b5413f..fe216e2 100644
--- a/diffuse_rs_common/src/core/tests/conv_tests.rs
+++ b/diffusion_rs_common/src/core/tests/conv_tests.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use diffuse_rs_common::core::{test_device, test_utils, Device, IndexOp, Tensor};
+use diffusion_rs_common::core::{test_device, test_utils, Device, IndexOp, Tensor};
/* This test is based on the following script.
import torch
@@ -385,7 +385,7 @@ print(w.grad[0])
*/
fn conv2d_grad(dev: &Device) -> Result<()> {
// conv-transposes are not implemented for metal
- use diffuse_rs_common::core::Var;
+ use diffusion_rs_common::core::Var;
let t = Var::from_slice(
&[
0.4056f32, -0.8689, -0.0773, -1.5630, -2.8012, -1.5059, 0.3972, 1.0852, 0.4997, 3.0616,
diff --git a/diffuse_rs_common/src/core/tests/custom_op_tests.rs b/diffusion_rs_common/src/core/tests/custom_op_tests.rs
similarity index 83%
rename from diffuse_rs_common/src/core/tests/custom_op_tests.rs
rename to diffusion_rs_common/src/core/tests/custom_op_tests.rs
index 5712cd0..6f1ef63 100644
--- a/diffuse_rs_common/src/core/tests/custom_op_tests.rs
+++ b/diffusion_rs_common/src/core/tests/custom_op_tests.rs
@@ -1,7 +1,7 @@
-use diffuse_rs_common::core::backend::BackendStorage;
-use diffuse_rs_common::core::cpu_backend;
-use diffuse_rs_common::core::test_utils::to_vec1_round;
-use diffuse_rs_common::core::{CpuStorage, CustomOp1, DType, Device, Error, Layout, Result, Shape, Tensor};
+use diffusion_rs_common::core::backend::BackendStorage;
+use diffusion_rs_common::core::cpu_backend;
+use diffusion_rs_common::core::test_utils::to_vec1_round;
+use diffusion_rs_common::core::{CpuStorage, CustomOp1, DType, Device, Error, Layout, Result, Shape, Tensor};
fn fwd(v: T, alpha: f64) -> T {
if v.is_sign_positive() {
@@ -22,7 +22,7 @@ impl CustomOp1 for Elu {
}
fn cpu_fwd(&self, s: &CpuStorage, l: &Layout) -> Result<(CpuStorage, Shape)> {
- let storage = diffuse_rs_common::core::map_dtype!(
+ let storage = diffusion_rs_common::core::map_dtype!(
"elu",
s,
|s| cpu_backend::unary_map(s, l, |v| fwd(v, self.alpha)),
@@ -65,7 +65,7 @@ impl CustomOp1 for EluBackward {
}
fn cpu_fwd(&self, s: &CpuStorage, l: &Layout) -> Result<(CpuStorage, Shape)> {
- let storage = diffuse_rs_common::core::map_dtype!(
+ let storage = diffusion_rs_common::core::map_dtype!(
"elu-bwd",
s,
|s| cpu_backend::unary_map(s, l, |v| bwd(v, self.alpha)),
@@ -102,7 +102,7 @@ impl CustomOp1 for EluWithBackward {
#[test]
fn custom_op1_with_backward() -> Result<()> {
let cpu = &Device::Cpu;
- let t = diffuse_rs_common::core::Var::new(&[-2f32, 0f32, 2f32], cpu)?;
+ let t = diffusion_rs_common::core::Var::new(&[-2f32, 0f32, 2f32], cpu)?;
let elu_t = t.apply_op1(EluWithBackward::new(2.))?;
assert_eq!(to_vec1_round(&elu_t, 4)?, &[-1.7293, 0.0, 2.0]);
@@ -113,7 +113,7 @@ fn custom_op1_with_backward() -> Result<()> {
Ok(())
}
-impl diffuse_rs_common::core::InplaceOp1 for Elu {
+impl diffusion_rs_common::core::InplaceOp1 for Elu {
fn name(&self) -> &'static str {
"elu"
}
@@ -125,7 +125,7 @@ impl diffuse_rs_common::core::InplaceOp1 for Elu {
CpuStorage::F16(s) => s.iter_mut().for_each(|v| *v = fwd(*v, alpha)),
CpuStorage::F32(s) => s.iter_mut().for_each(|v| *v = fwd(*v, alpha)),
CpuStorage::F64(s) => s.iter_mut().for_each(|v| *v = fwd(*v, alpha)),
- _ => diffuse_rs_common::bail!("unsupported dtype for inplace elu"),
+ _ => diffusion_rs_common::bail!("unsupported dtype for inplace elu"),
}
Ok(())
}
@@ -160,14 +160,14 @@ fn ug_op() -> Result<()> {
let opts: ug::lower_op::Opts = Default::default();
kernel.lower(&opts.with_global(0, 12))?
};
- let device = if diffuse_rs_common::core::utils::cuda_is_available() {
+ let device = if diffusion_rs_common::core::utils::cuda_is_available() {
Device::new_cuda(0)?
- } else if diffuse_rs_common::core::utils::metal_is_available() {
+ } else if diffusion_rs_common::core::utils::metal_is_available() {
Device::new_metal(0)?
} else {
- diffuse_rs_common::bail!("metal/cuda is mandatory for this test")
+ diffusion_rs_common::bail!("metal/cuda is mandatory for this test")
};
- let op = diffuse_rs_common::core::UgIOp1::new("test", kernel, &device)?;
+ let op = diffusion_rs_common::core::UgIOp1::new("test", kernel, &device)?;
let t = Tensor::arange(0u32, 12u32, &device)?.to_dtype(DType::F32)?;
t.inplace_op1(&op)?;
assert_eq!(
diff --git a/diffuse_rs_common/src/core/tests/display_tests.rs b/diffusion_rs_common/src/core/tests/display_tests.rs
similarity index 97%
rename from diffuse_rs_common/src/core/tests/display_tests.rs
rename to diffusion_rs_common/src/core/tests/display_tests.rs
index 8dae43c..93319a3 100644
--- a/diffuse_rs_common/src/core/tests/display_tests.rs
+++ b/diffusion_rs_common/src/core/tests/display_tests.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use diffuse_rs_common::core::{DType, Device::Cpu, Tensor};
+use diffusion_rs_common::core::{DType, Device::Cpu, Tensor};
#[test]
fn display_scalar() -> Result<()> {
diff --git a/diffuse_rs_common/src/core/tests/fortran_tensor_3d.pth b/diffusion_rs_common/src/core/tests/fortran_tensor_3d.pth
similarity index 100%
rename from diffuse_rs_common/src/core/tests/fortran_tensor_3d.pth
rename to diffusion_rs_common/src/core/tests/fortran_tensor_3d.pth
diff --git a/diffuse_rs_common/src/core/tests/grad_tests.rs b/diffusion_rs_common/src/core/tests/grad_tests.rs
similarity index 99%
rename from diffuse_rs_common/src/core/tests/grad_tests.rs
rename to diffusion_rs_common/src/core/tests/grad_tests.rs
index b06c001..644f193 100644
--- a/diffuse_rs_common/src/core/tests/grad_tests.rs
+++ b/diffusion_rs_common/src/core/tests/grad_tests.rs
@@ -1,6 +1,6 @@
#![allow(clippy::approx_constant)]
use anyhow::{Context, Result};
-use diffuse_rs_common::core::{test_device, test_utils, Device, Shape, Tensor, Var};
+use diffusion_rs_common::core::{test_device, test_utils, Device, Shape, Tensor, Var};
fn simple_grad(device: &Device) -> Result<()> {
let x = Var::new(&[3f32, 1., 4.], device)?;
diff --git a/diffuse_rs_common/src/core/tests/indexing_tests.rs b/diffusion_rs_common/src/core/tests/indexing_tests.rs
similarity index 99%
rename from diffuse_rs_common/src/core/tests/indexing_tests.rs
rename to diffusion_rs_common/src/core/tests/indexing_tests.rs
index 99fcc63..c7d9f02 100644
--- a/diffuse_rs_common/src/core/tests/indexing_tests.rs
+++ b/diffusion_rs_common/src/core/tests/indexing_tests.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use diffuse_rs_common::core::{Device, IndexOp, Tensor};
+use diffusion_rs_common::core::{Device, IndexOp, Tensor};
#[test]
fn integer_index() -> Result<()> {
diff --git a/diffuse_rs_common/src/core/tests/layout_tests.rs b/diffusion_rs_common/src/core/tests/layout_tests.rs
similarity index 100%
rename from diffuse_rs_common/src/core/tests/layout_tests.rs
rename to diffusion_rs_common/src/core/tests/layout_tests.rs
diff --git a/diffuse_rs_common/src/core/tests/matmul_tests.rs b/diffusion_rs_common/src/core/tests/matmul_tests.rs
similarity index 98%
rename from diffuse_rs_common/src/core/tests/matmul_tests.rs
rename to diffusion_rs_common/src/core/tests/matmul_tests.rs
index 07be82e..5ebf4f8 100644
--- a/diffuse_rs_common/src/core/tests/matmul_tests.rs
+++ b/diffusion_rs_common/src/core/tests/matmul_tests.rs
@@ -1,4 +1,4 @@
-use diffuse_rs_common::core::{test_device, DType, Device, IndexOp, Result, Tensor};
+use diffusion_rs_common::core::{test_device, DType, Device, IndexOp, Result, Tensor};
fn matmul(device: &Device) -> Result<()> {
let data = vec![1.0f32, 2.0, 3.0, 4.0];
diff --git a/diffuse_rs_common/src/core/tests/npy.py b/diffusion_rs_common/src/core/tests/npy.py
similarity index 100%
rename from diffuse_rs_common/src/core/tests/npy.py
rename to diffusion_rs_common/src/core/tests/npy.py
diff --git a/diffuse_rs_common/src/core/tests/pool_tests.rs b/diffusion_rs_common/src/core/tests/pool_tests.rs
similarity index 97%
rename from diffuse_rs_common/src/core/tests/pool_tests.rs
rename to diffusion_rs_common/src/core/tests/pool_tests.rs
index e706d16..1bb3804 100644
--- a/diffuse_rs_common/src/core/tests/pool_tests.rs
+++ b/diffusion_rs_common/src/core/tests/pool_tests.rs
@@ -1,4 +1,4 @@
-use diffuse_rs_common::core::{test_device, test_utils, Device, IndexOp, Result, Tensor};
+use diffusion_rs_common::core::{test_device, test_utils, Device, IndexOp, Result, Tensor};
// https://github.com/huggingface/candle/issues/364
fn avg_pool2d(dev: &Device) -> Result<()> {
diff --git a/diffuse_rs_common/src/core/tests/pth.py b/diffusion_rs_common/src/core/tests/pth.py
similarity index 100%
rename from diffuse_rs_common/src/core/tests/pth.py
rename to diffusion_rs_common/src/core/tests/pth.py
diff --git a/diffuse_rs_common/src/core/tests/pth_tests.rs b/diffusion_rs_common/src/core/tests/pth_tests.rs
similarity index 66%
rename from diffuse_rs_common/src/core/tests/pth_tests.rs
rename to diffusion_rs_common/src/core/tests/pth_tests.rs
index 0edaac1..e12f63d 100644
--- a/diffuse_rs_common/src/core/tests/pth_tests.rs
+++ b/diffusion_rs_common/src/core/tests/pth_tests.rs
@@ -1,14 +1,14 @@
/// Regression test for pth files not loading on Windows.
#[test]
fn test_pth() {
- let tensors = diffuse_rs_common::core::pickle::PthTensors::new("tests/test.pt", None).unwrap();
+ let tensors = diffusion_rs_common::core::pickle::PthTensors::new("tests/test.pt", None).unwrap();
tensors.get("test").unwrap().unwrap();
}
#[test]
fn test_pth_with_key() {
let tensors =
- diffuse_rs_common::core::pickle::PthTensors::new("tests/test_with_key.pt", Some("model_state_dict"))
+ diffusion_rs_common::core::pickle::PthTensors::new("tests/test_with_key.pt", Some("model_state_dict"))
.unwrap();
tensors.get("test").unwrap().unwrap();
}
@@ -16,7 +16,7 @@ fn test_pth_with_key() {
#[test]
fn test_pth_fortran_congiguous() {
let tensors =
- diffuse_rs_common::core::pickle::PthTensors::new("tests/fortran_tensor_3d.pth", None).unwrap();
+ diffusion_rs_common::core::pickle::PthTensors::new("tests/fortran_tensor_3d.pth", None).unwrap();
let tensor = tensors.get("tensor_fortran").unwrap().unwrap();
assert_eq!(tensor.dims3().unwrap(), (2, 3, 4));
diff --git a/diffuse_rs_common/src/core/tests/quantized_tests.rs b/diffusion_rs_common/src/core/tests/quantized_tests.rs
similarity index 99%
rename from diffuse_rs_common/src/core/tests/quantized_tests.rs
rename to diffusion_rs_common/src/core/tests/quantized_tests.rs
index 80af749..52e3053 100644
--- a/diffuse_rs_common/src/core/tests/quantized_tests.rs
+++ b/diffusion_rs_common/src/core/tests/quantized_tests.rs
@@ -1,4 +1,4 @@
-use diffuse_rs_common::core::{
+use diffusion_rs_common::core::{
bail,
quantized::{self, GgmlDType},
test_device,
diff --git a/diffuse_rs_common/src/core/tests/serialization_tests.rs b/diffusion_rs_common/src/core/tests/serialization_tests.rs
similarity index 76%
rename from diffuse_rs_common/src/core/tests/serialization_tests.rs
rename to diffusion_rs_common/src/core/tests/serialization_tests.rs
index e62c5f3..7ecea4a 100644
--- a/diffuse_rs_common/src/core/tests/serialization_tests.rs
+++ b/diffusion_rs_common/src/core/tests/serialization_tests.rs
@@ -1,4 +1,4 @@
-use diffuse_rs_common::core::{DType, Result, Tensor};
+use diffusion_rs_common::core::{DType, Result, Tensor};
struct TmpFile(std::path::PathBuf);
@@ -51,20 +51,20 @@ fn npz() -> Result<()> {
#[test]
fn safetensors() -> Result<()> {
- use diffuse_rs_common::core::safetensors::Load;
+ use diffusion_rs_common::core::safetensors::Load;
let tmp_file = TmpFile::create("st");
- let t = Tensor::arange(0f32, 24f32, &diffuse_rs_common::core::Device::Cpu)?;
+ let t = Tensor::arange(0f32, 24f32, &diffusion_rs_common::core::Device::Cpu)?;
t.save_safetensors("t", &tmp_file)?;
// Load from file.
- let st = diffuse_rs_common::core::safetensors::load(&tmp_file, &diffuse_rs_common::core::Device::Cpu)?;
+ let st = diffusion_rs_common::core::safetensors::load(&tmp_file, &diffusion_rs_common::core::Device::Cpu)?;
let t2 = st.get("t").unwrap();
let diff = (&t - t2)?.abs()?.sum_all()?.to_vec0::()?;
assert_eq!(diff, 0f32);
// Load from bytes.
let bytes = std::fs::read(tmp_file)?;
- let st = diffuse_rs_common::core::safetensors::SliceSafetensors::new(&bytes)?;
- let t2 = st.get("t").unwrap().load(&diffuse_rs_common::core::Device::Cpu);
+ let st = diffusion_rs_common::core::safetensors::SliceSafetensors::new(&bytes)?;
+ let t2 = st.get("t").unwrap().load(&diffusion_rs_common::core::Device::Cpu);
let diff = (&t - t2)?.abs()?.sum_all()?.to_vec0::()?;
assert_eq!(diff, 0f32);
Ok(())
diff --git a/diffuse_rs_common/src/core/tests/tensor_tests.rs b/diffusion_rs_common/src/core/tests/tensor_tests.rs
similarity index 99%
rename from diffuse_rs_common/src/core/tests/tensor_tests.rs
rename to diffusion_rs_common/src/core/tests/tensor_tests.rs
index a988bf9..8d55b24 100644
--- a/diffuse_rs_common/src/core/tests/tensor_tests.rs
+++ b/diffusion_rs_common/src/core/tests/tensor_tests.rs
@@ -1,4 +1,4 @@
-use diffuse_rs_common::core::{test_device, test_utils, DType, Device, IndexOp, Result, Tensor, D};
+use diffusion_rs_common::core::{test_device, test_utils, DType, Device, IndexOp, Result, Tensor, D};
fn zeros(device: &Device) -> Result<()> {
let tensor = Tensor::zeros((5, 2), DType::F32, device)?;
@@ -1545,7 +1545,7 @@ test_device!(zero_dim, zero_dim_cpu, zero_dim_gpu, zero_dim_metal);
fn randn_hasneg() -> Result<()> {
let t = Tensor::randn(0f32, 1f32, 200, &Device::Cpu)?.to_vec1::()?;
if t.iter().all(|&v| v >= 0.) {
- diffuse_rs_common::bail!("all values in tensors are non-negative")
+ diffusion_rs_common::bail!("all values in tensors are non-negative")
}
Ok(())
}
diff --git a/diffuse_rs_common/src/core/tests/test.npy b/diffusion_rs_common/src/core/tests/test.npy
similarity index 100%
rename from diffuse_rs_common/src/core/tests/test.npy
rename to diffusion_rs_common/src/core/tests/test.npy
diff --git a/diffuse_rs_common/src/core/tests/test.npz b/diffusion_rs_common/src/core/tests/test.npz
similarity index 100%
rename from diffuse_rs_common/src/core/tests/test.npz
rename to diffusion_rs_common/src/core/tests/test.npz
diff --git a/diffuse_rs_common/src/core/tests/test.pt b/diffusion_rs_common/src/core/tests/test.pt
similarity index 100%
rename from diffuse_rs_common/src/core/tests/test.pt
rename to diffusion_rs_common/src/core/tests/test.pt
diff --git a/diffuse_rs_common/src/core/tests/test_with_key.pt b/diffusion_rs_common/src/core/tests/test_with_key.pt
similarity index 100%
rename from diffuse_rs_common/src/core/tests/test_with_key.pt
rename to diffusion_rs_common/src/core/tests/test_with_key.pt
diff --git a/diffuse_rs_common/src/core/utils.rs b/diffusion_rs_common/src/core/utils.rs
similarity index 100%
rename from diffuse_rs_common/src/core/utils.rs
rename to diffusion_rs_common/src/core/utils.rs
diff --git a/diffuse_rs_common/src/core/variable.rs b/diffusion_rs_common/src/core/variable.rs
similarity index 100%
rename from diffuse_rs_common/src/core/variable.rs
rename to diffusion_rs_common/src/core/variable.rs
diff --git a/diffuse_rs_common/src/cuda_kernels/LICENSE b/diffusion_rs_common/src/cuda_kernels/LICENSE
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/LICENSE
rename to diffusion_rs_common/src/cuda_kernels/LICENSE
diff --git a/diffuse_rs_common/src/cuda_kernels/affine.cu b/diffusion_rs_common/src/cuda_kernels/affine.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/affine.cu
rename to diffusion_rs_common/src/cuda_kernels/affine.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/binary.cu b/diffusion_rs_common/src/cuda_kernels/binary.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/binary.cu
rename to diffusion_rs_common/src/cuda_kernels/binary.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/binary_op_macros.cuh b/diffusion_rs_common/src/cuda_kernels/binary_op_macros.cuh
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/binary_op_macros.cuh
rename to diffusion_rs_common/src/cuda_kernels/binary_op_macros.cuh
diff --git a/diffuse_rs_common/src/cuda_kernels/cast.cu b/diffusion_rs_common/src/cuda_kernels/cast.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/cast.cu
rename to diffusion_rs_common/src/cuda_kernels/cast.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/compatibility.cuh b/diffusion_rs_common/src/cuda_kernels/compatibility.cuh
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/compatibility.cuh
rename to diffusion_rs_common/src/cuda_kernels/compatibility.cuh
diff --git a/diffuse_rs_common/src/cuda_kernels/conv.cu b/diffusion_rs_common/src/cuda_kernels/conv.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/conv.cu
rename to diffusion_rs_common/src/cuda_kernels/conv.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/cuda_utils.cuh b/diffusion_rs_common/src/cuda_kernels/cuda_utils.cuh
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/cuda_utils.cuh
rename to diffusion_rs_common/src/cuda_kernels/cuda_utils.cuh
diff --git a/diffuse_rs_common/src/cuda_kernels/fill.cu b/diffusion_rs_common/src/cuda_kernels/fill.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/fill.cu
rename to diffusion_rs_common/src/cuda_kernels/fill.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/fused_rms_norm.cu b/diffusion_rs_common/src/cuda_kernels/fused_rms_norm.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/fused_rms_norm.cu
rename to diffusion_rs_common/src/cuda_kernels/fused_rms_norm.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/fused_rope.cu b/diffusion_rs_common/src/cuda_kernels/fused_rope.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/fused_rope.cu
rename to diffusion_rs_common/src/cuda_kernels/fused_rope.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/indexing.cu b/diffusion_rs_common/src/cuda_kernels/indexing.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/indexing.cu
rename to diffusion_rs_common/src/cuda_kernels/indexing.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/kvconcat.cu b/diffusion_rs_common/src/cuda_kernels/kvconcat.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/kvconcat.cu
rename to diffusion_rs_common/src/cuda_kernels/kvconcat.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/mod.rs b/diffusion_rs_common/src/cuda_kernels/mod.rs
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/mod.rs
rename to diffusion_rs_common/src/cuda_kernels/mod.rs
diff --git a/diffuse_rs_common/src/cuda_kernels/quantized.cu b/diffusion_rs_common/src/cuda_kernels/quantized.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/quantized.cu
rename to diffusion_rs_common/src/cuda_kernels/quantized.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/reduce.cu b/diffusion_rs_common/src/cuda_kernels/reduce.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/reduce.cu
rename to diffusion_rs_common/src/cuda_kernels/reduce.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/sort.cu b/diffusion_rs_common/src/cuda_kernels/sort.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/sort.cu
rename to diffusion_rs_common/src/cuda_kernels/sort.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/ternary.cu b/diffusion_rs_common/src/cuda_kernels/ternary.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/ternary.cu
rename to diffusion_rs_common/src/cuda_kernels/ternary.cu
diff --git a/diffuse_rs_common/src/cuda_kernels/unary.cu b/diffusion_rs_common/src/cuda_kernels/unary.cu
similarity index 100%
rename from diffuse_rs_common/src/cuda_kernels/unary.cu
rename to diffusion_rs_common/src/cuda_kernels/unary.cu
diff --git a/diffuse_rs_common/src/lib.rs b/diffusion_rs_common/src/lib.rs
similarity index 100%
rename from diffuse_rs_common/src/lib.rs
rename to diffusion_rs_common/src/lib.rs
diff --git a/diffuse_rs_common/src/metal_kernels/LICENSE b/diffusion_rs_common/src/metal_kernels/LICENSE
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/LICENSE
rename to diffusion_rs_common/src/metal_kernels/LICENSE
diff --git a/diffuse_rs_common/src/metal_kernels/affine.metal b/diffusion_rs_common/src/metal_kernels/affine.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/affine.metal
rename to diffusion_rs_common/src/metal_kernels/affine.metal
diff --git a/diffuse_rs_common/src/metal_kernels/binary.metal b/diffusion_rs_common/src/metal_kernels/binary.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/binary.metal
rename to diffusion_rs_common/src/metal_kernels/binary.metal
diff --git a/diffuse_rs_common/src/metal_kernels/cast.metal b/diffusion_rs_common/src/metal_kernels/cast.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/cast.metal
rename to diffusion_rs_common/src/metal_kernels/cast.metal
diff --git a/diffuse_rs_common/src/metal_kernels/conv.metal b/diffusion_rs_common/src/metal_kernels/conv.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/conv.metal
rename to diffusion_rs_common/src/metal_kernels/conv.metal
diff --git a/diffuse_rs_common/src/metal_kernels/fill.metal b/diffusion_rs_common/src/metal_kernels/fill.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/fill.metal
rename to diffusion_rs_common/src/metal_kernels/fill.metal
diff --git a/diffuse_rs_common/src/metal_kernels/indexing.metal b/diffusion_rs_common/src/metal_kernels/indexing.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/indexing.metal
rename to diffusion_rs_common/src/metal_kernels/indexing.metal
diff --git a/diffuse_rs_common/src/metal_kernels/libMetalFlashAttention.metallib b/diffusion_rs_common/src/metal_kernels/libMetalFlashAttention.metallib
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/libMetalFlashAttention.metallib
rename to diffusion_rs_common/src/metal_kernels/libMetalFlashAttention.metallib
diff --git a/diffuse_rs_common/src/metal_kernels/mlx_gemm.metal b/diffusion_rs_common/src/metal_kernels/mlx_gemm.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/mlx_gemm.metal
rename to diffusion_rs_common/src/metal_kernels/mlx_gemm.metal
diff --git a/diffuse_rs_common/src/metal_kernels/mod.rs b/diffusion_rs_common/src/metal_kernels/mod.rs
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/mod.rs
rename to diffusion_rs_common/src/metal_kernels/mod.rs
diff --git a/diffuse_rs_common/src/metal_kernels/quantized.metal b/diffusion_rs_common/src/metal_kernels/quantized.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/quantized.metal
rename to diffusion_rs_common/src/metal_kernels/quantized.metal
diff --git a/diffuse_rs_common/src/metal_kernels/random.metal b/diffusion_rs_common/src/metal_kernels/random.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/random.metal
rename to diffusion_rs_common/src/metal_kernels/random.metal
diff --git a/diffuse_rs_common/src/metal_kernels/reduce.metal b/diffusion_rs_common/src/metal_kernels/reduce.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/reduce.metal
rename to diffusion_rs_common/src/metal_kernels/reduce.metal
diff --git a/diffuse_rs_common/src/metal_kernels/scaled_dot_product_attention.metal b/diffusion_rs_common/src/metal_kernels/scaled_dot_product_attention.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/scaled_dot_product_attention.metal
rename to diffusion_rs_common/src/metal_kernels/scaled_dot_product_attention.metal
diff --git a/diffuse_rs_common/src/metal_kernels/sort.metal b/diffusion_rs_common/src/metal_kernels/sort.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/sort.metal
rename to diffusion_rs_common/src/metal_kernels/sort.metal
diff --git a/diffuse_rs_common/src/metal_kernels/ternary.metal b/diffusion_rs_common/src/metal_kernels/ternary.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/ternary.metal
rename to diffusion_rs_common/src/metal_kernels/ternary.metal
diff --git a/diffuse_rs_common/src/metal_kernels/tests.rs b/diffusion_rs_common/src/metal_kernels/tests.rs
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/tests.rs
rename to diffusion_rs_common/src/metal_kernels/tests.rs
diff --git a/diffuse_rs_common/src/metal_kernels/unary.metal b/diffusion_rs_common/src/metal_kernels/unary.metal
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/unary.metal
rename to diffusion_rs_common/src/metal_kernels/unary.metal
diff --git a/diffuse_rs_common/src/metal_kernels/utils.rs b/diffusion_rs_common/src/metal_kernels/utils.rs
similarity index 100%
rename from diffuse_rs_common/src/metal_kernels/utils.rs
rename to diffusion_rs_common/src/metal_kernels/utils.rs
diff --git a/diffuse_rs_common/src/model_source.rs b/diffusion_rs_common/src/model_source.rs
similarity index 99%
rename from diffuse_rs_common/src/model_source.rs
rename to diffusion_rs_common/src/model_source.rs
index 7fc777c..8fb2835 100644
--- a/diffuse_rs_common/src/model_source.rs
+++ b/diffusion_rs_common/src/model_source.rs
@@ -55,7 +55,7 @@ impl ModelSource {
/// with the same [base model](https://huggingface.co/black-forest-labs/FLUX.1-dev) as the original model ID.
///
/// ```rust
- /// use diffuse_rs_common::ModelSource;
+ /// use diffusion_rs_common::ModelSource;
///
/// let _ = ModelSource::from_model_id("black-forest-labs/FLUX.1-dev")
/// .override_transformer_model_id("sayakpaul/flux.1-dev-nf4-with-bnb-integration")?;
diff --git a/diffuse_rs_common/src/nn/LICENSE b/diffusion_rs_common/src/nn/LICENSE
similarity index 100%
rename from diffuse_rs_common/src/nn/LICENSE
rename to diffusion_rs_common/src/nn/LICENSE
diff --git a/diffuse_rs_common/src/nn/activation.rs b/diffusion_rs_common/src/nn/activation.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/activation.rs
rename to diffusion_rs_common/src/nn/activation.rs
diff --git a/diffuse_rs_common/src/nn/attention.rs b/diffusion_rs_common/src/nn/attention.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/attention.rs
rename to diffusion_rs_common/src/nn/attention.rs
diff --git a/diffuse_rs_common/src/nn/batch_norm.rs b/diffusion_rs_common/src/nn/batch_norm.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/batch_norm.rs
rename to diffusion_rs_common/src/nn/batch_norm.rs
diff --git a/diffuse_rs_common/src/nn/conv.rs b/diffusion_rs_common/src/nn/conv.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/conv.rs
rename to diffusion_rs_common/src/nn/conv.rs
diff --git a/diffuse_rs_common/src/nn/embedding.rs b/diffusion_rs_common/src/nn/embedding.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/embedding.rs
rename to diffusion_rs_common/src/nn/embedding.rs
diff --git a/diffuse_rs_common/src/nn/encoding.rs b/diffusion_rs_common/src/nn/encoding.rs
similarity index 93%
rename from diffuse_rs_common/src/nn/encoding.rs
rename to diffusion_rs_common/src/nn/encoding.rs
index 5875f9c..e5826b5 100644
--- a/diffuse_rs_common/src/nn/encoding.rs
+++ b/diffusion_rs_common/src/nn/encoding.rs
@@ -33,10 +33,10 @@ use crate::core::{DType, Result, Tensor, WithDType};
/// ## One-hot encoding
///
/// ```rust
-/// use diffuse_rs_common::core::{Shape, Tensor, Device};
-/// use diffuse_rs_common::nn::encoding::one_hot;
+/// use diffusion_rs_common::core::{Shape, Tensor, Device};
+/// use diffusion_rs_common::nn::encoding::one_hot;
///
-/// let device = diffuse_rs_common::core::Device::Cpu;
+/// let device = diffusion_rs_common::core::Device::Cpu;
///
/// let indices = Tensor::new(vec![vec![0i64, 2], vec![1, -1]], &device).unwrap();
/// let depth = 4;
@@ -56,11 +56,11 @@ use crate::core::{DType, Result, Tensor, WithDType};
/// ## One-cold Encoding
///
/// ```rust
-/// use diffuse_rs_common::core::{Shape, Tensor, Device};
-/// use diffuse_rs_common::nn::encoding::one_hot;
+/// use diffusion_rs_common::core::{Shape, Tensor, Device};
+/// use diffusion_rs_common::nn::encoding::one_hot;
///
///
-/// let device = diffuse_rs_common::core::Device::Cpu;
+/// let device = diffusion_rs_common::core::Device::Cpu;
/// let depth = 4;
/// let indices = Tensor::new(vec![vec![0u8, 2], vec![1, 3]], &device).unwrap();
/// let one_cold = one_hot(indices, depth, 0u8, 1u8).unwrap();
diff --git a/diffuse_rs_common/src/nn/func.rs b/diffusion_rs_common/src/nn/func.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/func.rs
rename to diffusion_rs_common/src/nn/func.rs
diff --git a/diffuse_rs_common/src/nn/group_norm.rs b/diffusion_rs_common/src/nn/group_norm.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/group_norm.rs
rename to diffusion_rs_common/src/nn/group_norm.rs
diff --git a/diffuse_rs_common/src/nn/init.rs b/diffusion_rs_common/src/nn/init.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/init.rs
rename to diffusion_rs_common/src/nn/init.rs
diff --git a/diffuse_rs_common/src/nn/kv_cache.rs b/diffusion_rs_common/src/nn/kv_cache.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/kv_cache.rs
rename to diffusion_rs_common/src/nn/kv_cache.rs
diff --git a/diffuse_rs_common/src/nn/layer_norm.rs b/diffusion_rs_common/src/nn/layer_norm.rs
similarity index 98%
rename from diffuse_rs_common/src/nn/layer_norm.rs
rename to diffusion_rs_common/src/nn/layer_norm.rs
index c9fbb01..0a04cd5 100644
--- a/diffuse_rs_common/src/nn/layer_norm.rs
+++ b/diffusion_rs_common/src/nn/layer_norm.rs
@@ -7,9 +7,9 @@
//! # Example
//!
//! ```rust
-//! use diffuse_rs_common::core::{Tensor, Device::Cpu, test_utils::to_vec3_round};
-//! use diffuse_rs_common::nn::{LayerNorm, Module};
-//! # fn main() -> diffuse_rs_common::core::Result<()> {
+//! use diffusion_rs_common::core::{Tensor, Device::Cpu, test_utils::to_vec3_round};
+//! use diffusion_rs_common::nn::{LayerNorm, Module};
+//! # fn main() -> diffusion_rs_common::core::Result<()> {
//!
//! let w = Tensor::new(&[1f32, 1f32, 1f32], &Cpu)?;
//! let b = Tensor::new(&[0f32, 0f32, 0f32], &Cpu)?;
diff --git a/diffuse_rs_common/src/nn/linear.rs b/diffusion_rs_common/src/nn/linear.rs
similarity index 94%
rename from diffuse_rs_common/src/nn/linear.rs
rename to diffusion_rs_common/src/nn/linear.rs
index 8a34b88..003239d 100644
--- a/diffuse_rs_common/src/nn/linear.rs
+++ b/diffusion_rs_common/src/nn/linear.rs
@@ -6,9 +6,9 @@
//! output has shape `(b_sz, out_c)` and `(out_c,)` respectively.
//!
//! ```rust
-//! use diffuse_rs_common::core::{Tensor, Device::Cpu};
-//! use diffuse_rs_common::nn::{Linear, Module};
-//! # fn main() -> diffuse_rs_common::core::Result<()> {
+//! use diffusion_rs_common::core::{Tensor, Device::Cpu};
+//! use diffusion_rs_common::nn::{Linear, Module};
+//! # fn main() -> diffusion_rs_common::core::Result<()> {
//!
//! let w = Tensor::new(&[[1f32, 2.], [3., 4.], [5., 6.]], &Cpu)?;
//! let layer = Linear::new(w, None); // Use no bias.
diff --git a/diffuse_rs_common/src/nn/loss.rs b/diffusion_rs_common/src/nn/loss.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/loss.rs
rename to diffusion_rs_common/src/nn/loss.rs
diff --git a/diffuse_rs_common/src/nn/mod.rs b/diffusion_rs_common/src/nn/mod.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/mod.rs
rename to diffusion_rs_common/src/nn/mod.rs
diff --git a/diffuse_rs_common/src/nn/ops.rs b/diffusion_rs_common/src/nn/ops.rs
similarity index 99%
rename from diffuse_rs_common/src/nn/ops.rs
rename to diffusion_rs_common/src/nn/ops.rs
index bc90d6a..24eb8aa 100644
--- a/diffuse_rs_common/src/nn/ops.rs
+++ b/diffusion_rs_common/src/nn/ops.rs
@@ -8,16 +8,16 @@ use rayon::prelude::*;
/// a slice of fixed index on dimension `dim` are between 0 and 1 and sum to 1.
///
/// ```rust
-/// use diffuse_rs_common::core::{Tensor, Device, test_utils::to_vec2_round};
+/// use diffusion_rs_common::core::{Tensor, Device, test_utils::to_vec2_round};
/// let a = Tensor::new(&[[0f32, 1., 0., 1.], [-2., 2., 3., -3.]], &Device::Cpu)?;
-/// let a = diffuse_rs_common::nn::ops::softmax(&a, 1)?;
+/// let a = diffusion_rs_common::nn::ops::softmax(&a, 1)?;
/// assert_eq!(
/// to_vec2_round(&a, 4)?,
/// &[
/// [0.1345, 0.3655, 0.1345, 0.3655],
/// [0.0049, 0.2671, 0.7262, 0.0018]
/// ]);
-/// # Ok::<(), diffuse_rs_common::core::Error>(())
+/// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn softmax(xs: &Tensor, dim: D) -> Result {
let dim = dim.to_index(xs.shape(), "softmax")?;
@@ -722,7 +722,7 @@ impl crate::core::CustomOp2 for AttnSoftmaxLastDim {
/// Softmax with fused broadcast addition of a mask and scale.
/// Equivalent to:
/// ```ignore
-/// diffuse_rs_common::nn::ops::softmax_last_dim(&(xs.broadcast_add(&mask)? * scale as f64)?)?
+/// diffusion_rs_common::nn::ops::softmax_last_dim(&(xs.broadcast_add(&mask)? * scale as f64)?)?
/// ```
/// - `xs` must be a rank-4 tensor
/// - `mask` must be a rank-2 matrix
diff --git a/diffuse_rs_common/src/nn/optim.rs b/diffusion_rs_common/src/nn/optim.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/optim.rs
rename to diffusion_rs_common/src/nn/optim.rs
diff --git a/diffuse_rs_common/src/nn/rnn.rs b/diffusion_rs_common/src/nn/rnn.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/rnn.rs
rename to diffusion_rs_common/src/nn/rnn.rs
diff --git a/diffuse_rs_common/src/nn/rope.rs b/diffusion_rs_common/src/nn/rope.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/rope.rs
rename to diffusion_rs_common/src/nn/rope.rs
diff --git a/diffuse_rs_common/src/nn/rotary_emb.rs b/diffusion_rs_common/src/nn/rotary_emb.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/rotary_emb.rs
rename to diffusion_rs_common/src/nn/rotary_emb.rs
diff --git a/diffuse_rs_common/src/nn/sequential.rs b/diffusion_rs_common/src/nn/sequential.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/sequential.rs
rename to diffusion_rs_common/src/nn/sequential.rs
diff --git a/diffuse_rs_common/src/nn/tests/batch_norm.rs b/diffusion_rs_common/src/nn/tests/batch_norm.rs
similarity index 98%
rename from diffuse_rs_common/src/nn/tests/batch_norm.rs
rename to diffusion_rs_common/src/nn/tests/batch_norm.rs
index 2c033ad..84710d1 100644
--- a/diffuse_rs_common/src/nn/tests/batch_norm.rs
+++ b/diffusion_rs_common/src/nn/tests/batch_norm.rs
@@ -6,7 +6,7 @@ extern crate accelerate_src;
use anyhow::Result;
use crate::core::{test_utils, DType, Device, Tensor};
-use diffuse_rs_common::nn::{batch_norm, BatchNorm, BatchNormConfig, VarBuilder, VarMap};
+use diffusion_rs_common::nn::{batch_norm, BatchNorm, BatchNormConfig, VarBuilder, VarMap};
/* The test below has been generated using the following PyTorch code:
import torch
diff --git a/diffuse_rs_common/src/nn/tests/group_norm.rs b/diffusion_rs_common/src/nn/tests/group_norm.rs
similarity index 98%
rename from diffuse_rs_common/src/nn/tests/group_norm.rs
rename to diffusion_rs_common/src/nn/tests/group_norm.rs
index 1d8d5f5..0be76a1 100644
--- a/diffuse_rs_common/src/nn/tests/group_norm.rs
+++ b/diffusion_rs_common/src/nn/tests/group_norm.rs
@@ -27,7 +27,7 @@ extern crate accelerate_src;
use anyhow::Result;
use crate::core::test_utils::to_vec3_round;
use crate::core::{Device, Tensor};
-use diffuse_rs_common::nn::{GroupNorm, Module};
+use diffusion_rs_common::nn::{GroupNorm, Module};
#[test]
fn group_norm() -> Result<()> {
diff --git a/diffuse_rs_common/src/nn/tests/kv_cache.rs b/diffusion_rs_common/src/nn/tests/kv_cache.rs
similarity index 96%
rename from diffuse_rs_common/src/nn/tests/kv_cache.rs
rename to diffusion_rs_common/src/nn/tests/kv_cache.rs
index 73fcb64..0e94175 100644
--- a/diffuse_rs_common/src/nn/tests/kv_cache.rs
+++ b/diffusion_rs_common/src/nn/tests/kv_cache.rs
@@ -8,7 +8,7 @@ use crate::core::{Device, Result, Tensor};
#[test]
fn kv_cache() -> Result<()> {
- let mut cache = diffuse_rs_common::nn::kv_cache::Cache::new(0, 16);
+ let mut cache = diffusion_rs_common::nn::kv_cache::Cache::new(0, 16);
for _ in [0, 1] {
assert_eq!(cache.current_seq_len(), 0);
let data = cache.current_data()?;
@@ -33,7 +33,7 @@ fn kv_cache() -> Result<()> {
#[test]
fn rotating_kv_cache() -> Result<()> {
- let mut cache = diffuse_rs_common::nn::kv_cache::RotatingCache::new(0, 6);
+ let mut cache = diffusion_rs_common::nn::kv_cache::RotatingCache::new(0, 6);
for _ in [0, 1] {
assert_eq!(cache.offset(), 0);
assert_eq!(cache.current_seq_len(), 0);
diff --git a/diffuse_rs_common/src/nn/tests/layer_norm.rs b/diffusion_rs_common/src/nn/tests/layer_norm.rs
similarity index 97%
rename from diffuse_rs_common/src/nn/tests/layer_norm.rs
rename to diffusion_rs_common/src/nn/tests/layer_norm.rs
index e09704d..487dfd3 100644
--- a/diffuse_rs_common/src/nn/tests/layer_norm.rs
+++ b/diffusion_rs_common/src/nn/tests/layer_norm.rs
@@ -6,7 +6,7 @@ extern crate accelerate_src;
use anyhow::Result;
use crate::core::{test_utils, Device, Tensor};
-use diffuse_rs_common::nn::{LayerNorm, Module};
+use diffusion_rs_common::nn::{LayerNorm, Module};
#[test]
fn layer_norm() -> Result<()> {
diff --git a/diffuse_rs_common/src/nn/tests/loss.rs b/diffusion_rs_common/src/nn/tests/loss.rs
similarity index 87%
rename from diffuse_rs_common/src/nn/tests/loss.rs
rename to diffusion_rs_common/src/nn/tests/loss.rs
index 54947c8..99a1a9c 100644
--- a/diffuse_rs_common/src/nn/tests/loss.rs
+++ b/diffusion_rs_common/src/nn/tests/loss.rs
@@ -32,10 +32,10 @@ fn nll_and_cross_entropy() -> Result<()> {
)?;
let target = Tensor::new(&[1u32, 0, 4], &cpu)?;
- let log_softmax = diffuse_rs_common::nn::ops::log_softmax(&input, 1)?;
- let loss = diffuse_rs_common::nn::loss::nll(&log_softmax, &target)?;
+ let log_softmax = diffusion_rs_common::nn::ops::log_softmax(&input, 1)?;
+ let loss = diffusion_rs_common::nn::loss::nll(&log_softmax, &target)?;
assert_eq!(to_vec0_round(&loss, 4)?, 1.1312);
- let loss = diffuse_rs_common::nn::loss::cross_entropy(&input, &target)?;
+ let loss = diffusion_rs_common::nn::loss::cross_entropy(&input, &target)?;
assert_eq!(to_vec0_round(&loss, 4)?, 1.1312);
Ok(())
}
@@ -81,7 +81,7 @@ fn binary_cross_entropy_with_logit() -> Result<()> {
let inp = Tensor::new(&inp, &cpu)?;
let target = Tensor::new(&target, &cpu)?;
- let loss = diffuse_rs_common::nn::loss::binary_cross_entropy_with_logit(&inp, &target)?;
+ let loss = diffusion_rs_common::nn::loss::binary_cross_entropy_with_logit(&inp, &target)?;
assert_eq!(to_vec0_round(&loss, 4)?, 0.8224);
Ok(())
diff --git a/diffuse_rs_common/src/nn/tests/one_hot.rs b/diffusion_rs_common/src/nn/tests/one_hot.rs
similarity index 98%
rename from diffuse_rs_common/src/nn/tests/one_hot.rs
rename to diffusion_rs_common/src/nn/tests/one_hot.rs
index 022de9c..00a2264 100644
--- a/diffuse_rs_common/src/nn/tests/one_hot.rs
+++ b/diffusion_rs_common/src/nn/tests/one_hot.rs
@@ -1,5 +1,5 @@
use crate::core::{Result, Shape, Tensor};
-use diffuse_rs_common::nn::encoding::one_hot;
+use diffusion_rs_common::nn::encoding::one_hot;
#[test]
fn test_i64_one_hot() -> Result<()> {
diff --git a/diffuse_rs_common/src/nn/tests/ops.rs b/diffusion_rs_common/src/nn/tests/ops.rs
similarity index 84%
rename from diffuse_rs_common/src/nn/tests/ops.rs
rename to diffusion_rs_common/src/nn/tests/ops.rs
index 5cec5fd..8139b54 100644
--- a/diffuse_rs_common/src/nn/tests/ops.rs
+++ b/diffusion_rs_common/src/nn/tests/ops.rs
@@ -9,9 +9,9 @@ use crate::core::{test_device, test_utils::to_vec3_round, Device, Result, Tensor
fn softmax(device: &Device) -> Result<()> {
let data = &[[[3f32, 1., 4.], [1., 5., 9.]], [[2., 1., 7.], [8., 2., 8.]]];
let tensor = Tensor::new(data, device)?;
- let t0 = diffuse_rs_common::nn::ops::softmax(&tensor.log()?, 0)?;
- let t1 = diffuse_rs_common::nn::ops::softmax(&tensor.log()?, 1)?;
- let t2 = diffuse_rs_common::nn::ops::softmax(&tensor.log()?, 2)?;
+ let t0 = diffusion_rs_common::nn::ops::softmax(&tensor.log()?, 0)?;
+ let t1 = diffusion_rs_common::nn::ops::softmax(&tensor.log()?, 1)?;
+ let t2 = diffusion_rs_common::nn::ops::softmax(&tensor.log()?, 2)?;
assert_eq!(
to_vec3_round(&t0, 4)?,
&[
@@ -39,7 +39,7 @@ fn softmax(device: &Device) -> Result<()> {
[[0.2, 0.1, 0.7], [0.4444, 0.1111, 0.4444]]
]
);
- let t2 = diffuse_rs_common::nn::ops::softmax_last_dim(&tensor.log()?)?;
+ let t2 = diffusion_rs_common::nn::ops::softmax_last_dim(&tensor.log()?)?;
assert_eq!(
to_vec3_round(&t2, 4)?,
&[
@@ -55,7 +55,7 @@ fn softmax(device: &Device) -> Result<()> {
fn inplace_softmax(device: &Device) -> Result<()> {
let data = &[[[3f32, 1., 4.], [1., 5., 9.]], [[2., 1., 7.], [8., 2., 8.]]];
let mut tensor = Tensor::new(data, device)?.log()?;
- diffuse_rs_common::nn::ops::inplace_softmax_last_dim(&mut tensor)?;
+ diffusion_rs_common::nn::ops::inplace_softmax_last_dim(&mut tensor)?;
assert_eq!(
to_vec3_round(&tensor, 4)?,
&[
@@ -72,7 +72,7 @@ fn rms_norm(device: &Device) -> Result<()> {
let data = &[[[3f32, 1., 4.], [1., 5., 9.]], [[2., 1., 7.], [8., 2., 8.]]];
let tensor = Tensor::new(data, device)?;
let alpha = Tensor::new(&[1f32, 2f32, 3f32], device)?;
- let t = diffuse_rs_common::nn::ops::rms_norm(&tensor, &alpha, 1e-5)?;
+ let t = diffusion_rs_common::nn::ops::rms_norm(&tensor, &alpha, 1e-5)?;
assert_eq!(
to_vec3_round(&t, 4)?,
&[
@@ -80,7 +80,7 @@ fn rms_norm(device: &Device) -> Result<()> {
[[0.4714, 0.4714, 4.9497], [1.206, 0.603, 3.6181]]
]
);
- let t2 = diffuse_rs_common::nn::ops::rms_norm_slow(&tensor, &alpha, 1e-5)?;
+ let t2 = diffusion_rs_common::nn::ops::rms_norm_slow(&tensor, &alpha, 1e-5)?;
assert_eq!(
to_vec3_round(&t2, 4)?,
&[
@@ -102,8 +102,8 @@ fn rms_norml(device: &Device) -> Result<()> {
let src: Vec = (0..el_count).map(|_| rng.gen::()).collect();
let tensor = Tensor::new(src, device)?.reshape((b_size, seq_len, head_dim))?;
let alpha = Tensor::ones(head_dim, crate::core::DType::F32, device)?;
- let t = diffuse_rs_common::nn::ops::rms_norm(&tensor, &alpha, 1e-5)?;
- let t2 = diffuse_rs_common::nn::ops::rms_norm_slow(&tensor, &alpha, 1e-5)?;
+ let t = diffusion_rs_common::nn::ops::rms_norm(&tensor, &alpha, 1e-5)?;
+ let t2 = diffusion_rs_common::nn::ops::rms_norm_slow(&tensor, &alpha, 1e-5)?;
let diff = (t - t2)?
.abs()?
.flatten_all()?
@@ -119,7 +119,7 @@ fn layer_norm(device: &Device) -> Result<()> {
let tensor = Tensor::new(data, device)?;
let alpha = Tensor::new(&[1f32, 2f32, 3f32], device)?;
let beta = Tensor::new(&[0.5f32, 0f32, -0.2f32], device)?;
- let t = diffuse_rs_common::nn::ops::layer_norm(&tensor, &alpha, &beta, 1e-5)?;
+ let t = diffusion_rs_common::nn::ops::layer_norm(&tensor, &alpha, &beta, 1e-5)?;
assert_eq!(
to_vec3_round(&t, 4)?,
&[
@@ -127,7 +127,7 @@ fn layer_norm(device: &Device) -> Result<()> {
[[-0.008, -1.778, 3.991], [1.2071, -2.8284, 1.9213]]
]
);
- let t2 = diffuse_rs_common::nn::ops::layer_norm_slow(&tensor, &alpha, &beta, 1e-5)?;
+ let t2 = diffusion_rs_common::nn::ops::layer_norm_slow(&tensor, &alpha, &beta, 1e-5)?;
assert_eq!(
to_vec3_round(&t2, 4)?,
&[
@@ -150,8 +150,8 @@ fn layer_norml(device: &Device) -> Result<()> {
let tensor = Tensor::new(src, device)?.reshape((b_size, seq_len, head_dim))?;
let alpha = Tensor::ones(head_dim, crate::core::DType::F32, device)?;
let beta = Tensor::zeros(head_dim, crate::core::DType::F32, device)?;
- let t = diffuse_rs_common::nn::ops::layer_norm(&tensor, &alpha, &beta, 1e-5)?;
- let t2 = diffuse_rs_common::nn::ops::layer_norm_slow(&tensor, &alpha, &beta, 1e-5)?;
+ let t = diffusion_rs_common::nn::ops::layer_norm(&tensor, &alpha, &beta, 1e-5)?;
+ let t2 = diffusion_rs_common::nn::ops::layer_norm_slow(&tensor, &alpha, &beta, 1e-5)?;
let diff = (t - t2)?
.abs()?
.flatten_all()?
@@ -166,7 +166,7 @@ fn layer_norml(device: &Device) -> Result<()> {
fn softmax_numerical_stability() -> Result<()> {
let dev = &Device::Cpu;
let xs = Tensor::new(&[1234f32, 0.], dev)?;
- let softmax = diffuse_rs_common::nn::ops::softmax(&xs, 0)?;
+ let softmax = diffusion_rs_common::nn::ops::softmax(&xs, 0)?;
assert_eq!(softmax.to_vec1::()?, &[1f32, 0.]);
Ok(())
}
@@ -187,8 +187,8 @@ fn ropei(device: &Device) -> Result<()> {
let src = Tensor::from_vec(src, (b_size, num_head, seq_len, head_dim), device)?;
let cos = Tensor::from_vec(cos, (seq_len, head_dim / 2), device)?;
let sin = Tensor::from_vec(sin, (seq_len, head_dim / 2), device)?;
- let rope1 = diffuse_rs_common::nn::rotary_emb::rope_i(&src, &cos, &sin)?;
- let rope2 = diffuse_rs_common::nn::rotary_emb::rope_i_slow(&src, &cos, &sin)?;
+ let rope1 = diffusion_rs_common::nn::rotary_emb::rope_i(&src, &cos, &sin)?;
+ let rope2 = diffusion_rs_common::nn::rotary_emb::rope_i_slow(&src, &cos, &sin)?;
let sum_diff = (rope1 - rope2)?.abs()?.sum_all()?.to_vec0::()?;
if device.is_cpu() {
assert_eq!(sum_diff, 0.);
@@ -214,8 +214,8 @@ fn rope(device: &Device) -> Result<()> {
let src = Tensor::from_vec(src, (b_size, num_head, seq_len, head_dim), device)?;
let cos = Tensor::from_vec(cos, (seq_len, head_dim / 2), device)?;
let sin = Tensor::from_vec(sin, (seq_len, head_dim / 2), device)?;
- let rope1 = diffuse_rs_common::nn::rotary_emb::rope(&src, &cos, &sin)?;
- let rope2 = diffuse_rs_common::nn::rotary_emb::rope_slow(&src, &cos, &sin)?;
+ let rope1 = diffusion_rs_common::nn::rotary_emb::rope(&src, &cos, &sin)?;
+ let rope2 = diffusion_rs_common::nn::rotary_emb::rope_slow(&src, &cos, &sin)?;
let sum_diff = (rope1 - rope2)?.abs()?.sum_all()?.to_vec0::()?;
if device.is_cpu() {
assert_eq!(sum_diff, 0.);
@@ -243,9 +243,9 @@ fn rope_thd(device: &Device) -> Result<()> {
let sin = Tensor::from_vec(sin, (seq_len, head_dim / 2), device)?;
let rope1 = {
let src = src.transpose(1, 2)?.contiguous()?;
- diffuse_rs_common::nn::rotary_emb::rope_thd(&src, &cos, &sin)?.transpose(1, 2)?
+ diffusion_rs_common::nn::rotary_emb::rope_thd(&src, &cos, &sin)?.transpose(1, 2)?
};
- let rope2 = diffuse_rs_common::nn::rotary_emb::rope_slow(&src, &cos, &sin)?;
+ let rope2 = diffusion_rs_common::nn::rotary_emb::rope_slow(&src, &cos, &sin)?;
let sum_diff = (rope1 - rope2)?.abs()?.sum_all()?.to_vec0::()?;
if device.is_cpu() {
assert_eq!(sum_diff, 0.);
@@ -258,7 +258,7 @@ fn rope_thd(device: &Device) -> Result<()> {
fn sigmoid(device: &Device) -> Result<()> {
let data = &[[[3f32, 1., 4.], [1., 5., 9.]], [[2., 1., 7.], [8., 2., 8.]]];
let tensor = Tensor::new(data, device)?;
- let s1 = diffuse_rs_common::nn::ops::sigmoid(&tensor)?;
+ let s1 = diffusion_rs_common::nn::ops::sigmoid(&tensor)?;
let s2 = (1. / (1. + tensor.neg()?.exp()?)?)?;
let diff = (s1 - s2)?.abs()?.sum_all()?.to_vec0::()?;
assert_eq!(diff, 0.);
diff --git a/diffuse_rs_common/src/nn/tests/optim.rs b/diffusion_rs_common/src/nn/tests/optim.rs
similarity index 96%
rename from diffuse_rs_common/src/nn/tests/optim.rs
rename to diffusion_rs_common/src/nn/tests/optim.rs
index f7cff38..8958b36 100644
--- a/diffuse_rs_common/src/nn/tests/optim.rs
+++ b/diffusion_rs_common/src/nn/tests/optim.rs
@@ -8,7 +8,7 @@ use crate::core::test_utils::{to_vec0_round, to_vec2_round};
use anyhow::Result;
use crate::core::{DType, Device, Tensor, Var};
-use diffuse_rs_common::nn::{AdamW, Linear, Module, Optimizer, ParamsAdamW, SGD};
+use diffusion_rs_common::nn::{AdamW, Linear, Module, Optimizer, ParamsAdamW, SGD};
#[test]
fn sgd_optim() -> Result<()> {
@@ -124,7 +124,7 @@ fn adamw_linear_regression() -> Result<()> {
#[test]
fn adamw_linear_regression_varmap() -> Result<()> {
- use diffuse_rs_common::nn::Init::Const;
+ use diffusion_rs_common::nn::Init::Const;
// Similar as the previous test but using a VarMap.
let w_gen = Tensor::new(&[[3f32, 1.]], &Device::Cpu)?;
@@ -133,7 +133,7 @@ fn adamw_linear_regression_varmap() -> Result<()> {
let sample_xs = Tensor::new(&[[2f32, 1.], [7., 4.], [-4., 12.], [5., 8.]], &Device::Cpu)?;
let sample_ys = gen.forward(&sample_xs)?;
- let mut var_map = diffuse_rs_common::nn::VarMap::new();
+ let mut var_map = diffusion_rs_common::nn::VarMap::new();
let w = var_map.get((1, 2), "w", Const(0.), DType::F32, &Device::Cpu)?;
let b = var_map.get((), "b", Const(0.), DType::F32, &Device::Cpu)?;
diff --git a/diffuse_rs_common/src/nn/tests/rnn.rs b/diffusion_rs_common/src/nn/tests/rnn.rs
similarity index 91%
rename from diffuse_rs_common/src/nn/tests/rnn.rs
rename to diffusion_rs_common/src/nn/tests/rnn.rs
index d50fbcf..10c3d5c 100644
--- a/diffuse_rs_common/src/nn/tests/rnn.rs
+++ b/diffusion_rs_common/src/nn/tests/rnn.rs
@@ -5,7 +5,7 @@ extern crate intel_mkl_src;
extern crate accelerate_src;
use crate::core::{test_utils::to_vec2_round, DType, Device, Result, Tensor};
-use diffuse_rs_common::nn::RNN;
+use diffusion_rs_common::nn::RNN;
/* The following test can be verified against PyTorch using the following snippet.
import torch
@@ -42,8 +42,8 @@ fn lstm() -> Result<()> {
]
.into_iter()
.collect();
- let vb = diffuse_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, cpu);
- let lstm = diffuse_rs_common::nn::lstm(2, 3, Default::default(), vb)?;
+ let vb = diffusion_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, cpu);
+ let lstm = diffusion_rs_common::nn::lstm(2, 3, Default::default(), vb)?;
let mut state = lstm.zero_state(1)?;
for inp in [3f32, 1., 4., 1., 5., 9., 2.] {
let inp = Tensor::new(&[[inp, inp * 0.5]], cpu)?;
@@ -88,8 +88,8 @@ fn gru() -> Result<()> {
]
.into_iter()
.collect();
- let vb = diffuse_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, cpu);
- let gru = diffuse_rs_common::nn::gru(2, 3, Default::default(), vb)?;
+ let vb = diffusion_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, cpu);
+ let gru = diffusion_rs_common::nn::gru(2, 3, Default::default(), vb)?;
let mut state = gru.zero_state(1)?;
for inp in [3f32, 1., 4., 1., 5., 9., 2.] {
let inp = Tensor::new(&[[inp, inp * 0.5]], cpu)?;
diff --git a/diffuse_rs_common/src/nn/tests/sdpa.rs b/diffusion_rs_common/src/nn/tests/sdpa.rs
similarity index 83%
rename from diffuse_rs_common/src/nn/tests/sdpa.rs
rename to diffusion_rs_common/src/nn/tests/sdpa.rs
index a89a562..53c0787 100644
--- a/diffuse_rs_common/src/nn/tests/sdpa.rs
+++ b/diffusion_rs_common/src/nn/tests/sdpa.rs
@@ -20,12 +20,12 @@ mod metal_sdpa_tests {
let ground_truth = {
let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?;
- let att = diffuse_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)?
+ let att = diffusion_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)?
.to_dtype(q.dtype())?;
att.matmul(&v.clone())?
};
- let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?;
+ let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?;
assert_eq!(ground_truth.shape(), sdpa_output.shape());
@@ -58,12 +58,12 @@ mod metal_sdpa_tests {
let ground_truth = {
let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?;
- let att = diffuse_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)?
+ let att = diffusion_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)?
.to_dtype(q.dtype())?;
att.matmul(&v.clone())?
};
- let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?;
+ let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?;
assert_eq!(ground_truth.shape(), sdpa_output.shape());
@@ -96,12 +96,12 @@ mod metal_sdpa_tests {
let ground_truth = {
let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?;
- let att = diffuse_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)?
+ let att = diffusion_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)?
.to_dtype(q.dtype())?;
att.matmul(&v.clone())?
};
- let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?;
+ let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?;
assert_eq!(ground_truth.shape(), sdpa_output.shape());
@@ -136,7 +136,7 @@ mod metal_sdpa_tests {
let ground_truth = {
let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?;
- let att = diffuse_rs_common::nn::ops::softmax_last_dim(
+ let att = diffusion_rs_common::nn::ops::softmax_last_dim(
&att.to_dtype(DType::F32)?
.div(SOFTCAP)?
.tanh()?
@@ -146,7 +146,7 @@ mod metal_sdpa_tests {
att.matmul(&v.clone())?
};
- let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?;
+ let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?;
assert_eq!(ground_truth.shape(), sdpa_output.shape());
@@ -181,7 +181,7 @@ mod metal_sdpa_tests {
let ground_truth = {
let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?;
- let att = diffuse_rs_common::nn::ops::softmax_last_dim(
+ let att = diffusion_rs_common::nn::ops::softmax_last_dim(
&att.to_dtype(DType::F32)?
.div(SOFTCAP)?
.tanh()?
@@ -191,7 +191,7 @@ mod metal_sdpa_tests {
att.matmul(&v.clone())?
};
- let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?;
+ let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?;
assert_eq!(ground_truth.shape(), sdpa_output.shape());
@@ -226,7 +226,7 @@ mod metal_sdpa_tests {
let ground_truth = {
let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?;
- let att = diffuse_rs_common::nn::ops::softmax_last_dim(
+ let att = diffusion_rs_common::nn::ops::softmax_last_dim(
&att.to_dtype(DType::F32)?
.div(SOFTCAP)?
.tanh()?
@@ -236,7 +236,7 @@ mod metal_sdpa_tests {
att.matmul(&v.clone())?
};
- let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?;
+ let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?;
assert_eq!(ground_truth.shape(), sdpa_output.shape());
@@ -269,12 +269,12 @@ mod metal_sdpa_tests {
let ground_truth = {
let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?;
- let att = diffuse_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)?
+ let att = diffusion_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)?
.to_dtype(q.dtype())?;
att.matmul(&v.clone())?
};
- let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?;
+ let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?;
assert_eq!(ground_truth.shape(), sdpa_output.shape());
@@ -296,9 +296,9 @@ mod metal_sdpa_tests {
let tensor = Tensor::randn(0f32, 1f32, (4, 32, 64, 64), &device)?;
let truemask = Tensor::full(f32::MIN, (64, 64), &device)?.contiguous()?;
- let ground_truth = diffuse_rs_common::nn::ops::softmax_last_dim(&tensor.broadcast_add(&truemask)?)?;
+ let ground_truth = diffusion_rs_common::nn::ops::softmax_last_dim(&tensor.broadcast_add(&truemask)?)?;
- let softmax_out = diffuse_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, 1.)?;
+ let softmax_out = diffusion_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, 1.)?;
let error: f32 = ((&ground_truth - &softmax_out)?.abs()? / &ground_truth.abs()?)?
.sum_all()?
@@ -323,10 +323,10 @@ mod metal_sdpa_tests {
let scale = 0.1f32;
let ground_truth =
- diffuse_rs_common::nn::ops::softmax_last_dim(&(tensor.broadcast_add(&truemask)? * scale as f64)?)?
+ diffusion_rs_common::nn::ops::softmax_last_dim(&(tensor.broadcast_add(&truemask)? * scale as f64)?)?
.to_dtype(DType::F32)?;
- let softmax_out = diffuse_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, scale)?
+ let softmax_out = diffusion_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, scale)?
.to_dtype(DType::F32)?;
let error: f32 = ((&ground_truth - &softmax_out)?.abs()? / &ground_truth.abs()?)?
@@ -348,9 +348,9 @@ mod metal_sdpa_tests {
let tensor = Tensor::randn(0f32, 1f32, (4, 32, 64, 63), &device)?;
let truemask = Tensor::full(f32::MIN, (64, 63), &device)?.contiguous()?;
- let ground_truth = diffuse_rs_common::nn::ops::softmax_last_dim(&tensor.broadcast_add(&truemask)?)?;
+ let ground_truth = diffusion_rs_common::nn::ops::softmax_last_dim(&tensor.broadcast_add(&truemask)?)?;
- let softmax_out = diffuse_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, 1.)?;
+ let softmax_out = diffusion_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, 1.)?;
let error: f32 = ((&ground_truth - &softmax_out)?.abs()? / &ground_truth.abs()?)?
.sum_all()?
@@ -375,10 +375,10 @@ mod metal_sdpa_tests {
let scale = 0.1f32;
let ground_truth =
- diffuse_rs_common::nn::ops::softmax_last_dim(&(tensor.broadcast_add(&truemask)? * scale as f64)?)?
+ diffusion_rs_common::nn::ops::softmax_last_dim(&(tensor.broadcast_add(&truemask)? * scale as f64)?)?
.to_dtype(DType::F32)?;
- let softmax_out = diffuse_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, scale)?
+ let softmax_out = diffusion_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, scale)?
.to_dtype(DType::F32)?;
let error: f32 = ((&ground_truth - &softmax_out)?.abs()? / &ground_truth.abs()?)?
diff --git a/diffuse_rs_common/src/nn/var_builder.rs b/diffusion_rs_common/src/nn/var_builder.rs
similarity index 99%
rename from diffuse_rs_common/src/nn/var_builder.rs
rename to diffusion_rs_common/src/nn/var_builder.rs
index 16dae4c..15d7e92 100644
--- a/diffuse_rs_common/src/nn/var_builder.rs
+++ b/diffusion_rs_common/src/nn/var_builder.rs
@@ -650,7 +650,7 @@ impl<'a> VarBuilder<'a> {
/// passing the new names to the inner VarBuilder.
///
/// ```rust
- /// use diffuse_rs_common::core::{Tensor, DType, Device};
+ /// use diffusion_rs_common::core::{Tensor, DType, Device};
///
/// let a = Tensor::arange(0f32, 6f32, &Device::Cpu)?.reshape((2, 3))?;
/// let tensors: std::collections::HashMap<_, _> = [
@@ -658,7 +658,7 @@ impl<'a> VarBuilder<'a> {
/// ]
/// .into_iter()
/// .collect();
- /// let vb = diffuse_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, &Device::Cpu);
+ /// let vb = diffusion_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, &Device::Cpu);
/// assert!(vb.contains_tensor("foo"));
/// assert!(vb.get((2, 3), "foo").is_ok());
/// assert!(!vb.contains_tensor("bar"));
@@ -668,7 +668,7 @@ impl<'a> VarBuilder<'a> {
/// assert!(vb.get((2, 3), "bar").is_ok());
/// assert!(vb.get((2, 3), "foo").is_ok());
/// assert!(!vb.contains_tensor("baz"));
- /// # Ok::<(), diffuse_rs_common::core::Error>(())
+ /// # Ok::<(), diffusion_rs_common::core::Error>(())
/// ```
pub fn rename_f String + Sync + Send + 'static>(self, f: F) -> Self {
let f: Box String + Sync + Send + 'static> = Box::new(f);
diff --git a/diffuse_rs_common/src/nn/var_map.rs b/diffusion_rs_common/src/nn/var_map.rs
similarity index 100%
rename from diffuse_rs_common/src/nn/var_map.rs
rename to diffusion_rs_common/src/nn/var_map.rs
diff --git a/diffuse_rs_common/src/nn_wrap.rs b/diffusion_rs_common/src/nn_wrap.rs
similarity index 100%
rename from diffuse_rs_common/src/nn_wrap.rs
rename to diffusion_rs_common/src/nn_wrap.rs
diff --git a/diffuse_rs_common/src/progress.rs b/diffusion_rs_common/src/progress.rs
similarity index 100%
rename from diffuse_rs_common/src/progress.rs
rename to diffusion_rs_common/src/progress.rs
diff --git a/diffuse_rs_common/src/safetensors.rs b/diffusion_rs_common/src/safetensors.rs
similarity index 100%
rename from diffuse_rs_common/src/safetensors.rs
rename to diffusion_rs_common/src/safetensors.rs
diff --git a/diffuse_rs_common/src/tokenizer.rs b/diffusion_rs_common/src/tokenizer.rs
similarity index 100%
rename from diffuse_rs_common/src/tokenizer.rs
rename to diffusion_rs_common/src/tokenizer.rs
diff --git a/diffuse_rs_common/src/tokens.rs b/diffusion_rs_common/src/tokens.rs
similarity index 100%
rename from diffuse_rs_common/src/tokens.rs
rename to diffusion_rs_common/src/tokens.rs
diff --git a/diffuse_rs_common/src/varbuilder.rs b/diffusion_rs_common/src/varbuilder.rs
similarity index 100%
rename from diffuse_rs_common/src/varbuilder.rs
rename to diffusion_rs_common/src/varbuilder.rs
diff --git a/diffuse_rs_common/src/varbuilder_loading.rs b/diffusion_rs_common/src/varbuilder_loading.rs
similarity index 100%
rename from diffuse_rs_common/src/varbuilder_loading.rs
rename to diffusion_rs_common/src/varbuilder_loading.rs
diff --git a/diffuse_rs_core/Cargo.toml b/diffusion_rs_core/Cargo.toml
similarity index 59%
rename from diffuse_rs_core/Cargo.toml
rename to diffusion_rs_core/Cargo.toml
index 405bb50..330c701 100644
--- a/diffuse_rs_core/Cargo.toml
+++ b/diffusion_rs_core/Cargo.toml
@@ -1,10 +1,10 @@
[package]
-name = "diffuse_rs_core"
+name = "diffusion_rs_core"
readme.workspace = true
authors.workspace = true
version.workspace = true
edition.workspace = true
-description = "Core package of diffuse_rs"
+description = "Core package of diffusion_rs"
repository.workspace = true
keywords.workspace = true
categories.workspace = true
@@ -16,8 +16,8 @@ anyhow.workspace = true
float8.workspace = true
half.workspace = true
hf-hub.workspace = true
-diffuse_rs_backend = { path = "../diffuse_rs_backend" }
-diffuse_rs_common = { path = "../diffuse_rs_common" }
+diffusion_rs_backend = { path = "../diffusion_rs_backend" }
+diffusion_rs_common = { path = "../diffusion_rs_common" }
serde.workspace = true
serde_plain.workspace = true
serde_json.workspace = true
@@ -31,8 +31,8 @@ objc = { workspace = true, optional = true }
clap.workspace = true
[features]
-cuda = ["diffuse_rs_common/cuda", "diffuse_rs_backend/cuda"]
-cudnn = ["diffuse_rs_common/cudnn"]
-metal = ["diffuse_rs_common/metal", "diffuse_rs_backend/metal", "dep:objc"]
-accelerate = ["diffuse_rs_common/accelerate"]
-mkl = ["diffuse_rs_common/mkl"]
+cuda = ["diffusion_rs_common/cuda", "diffusion_rs_backend/cuda"]
+cudnn = ["diffusion_rs_common/cudnn"]
+metal = ["diffusion_rs_common/metal", "diffusion_rs_backend/metal", "dep:objc"]
+accelerate = ["diffusion_rs_common/accelerate"]
+mkl = ["diffusion_rs_common/mkl"]
diff --git a/diffuse_rs_core/src/lib.rs b/diffusion_rs_core/src/lib.rs
similarity index 81%
rename from diffuse_rs_core/src/lib.rs
rename to diffusion_rs_core/src/lib.rs
index 0acf129..0575831 100644
--- a/diffuse_rs_core/src/lib.rs
+++ b/diffusion_rs_core/src/lib.rs
@@ -1,11 +1,11 @@
-//! Core crate for interacting with diffuse_rs.
+//! Core crate for interacting with diffusion_rs.
//!
//! The API is intentionally straightforward but strives to provide strong flexibility.
//!
//! ```rust,no_run
//! use std::time::Instant;
//!
-//! use diffuse_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource};
+//! use diffusion_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource};
//!
//! let pipeline = Pipeline::load(
//! ModelSource::dduf("FLUX.1-dev-Q4-bnb.dduf")?,
@@ -38,5 +38,5 @@
mod models;
mod pipelines;
-pub use diffuse_rs_common::{ModelSource, TokenSource};
+pub use diffusion_rs_common::{ModelSource, TokenSource};
pub use pipelines::{DiffusionGenerationParams, Offloading, Pipeline};
diff --git a/diffuse_rs_core/src/models/clip/mod.rs b/diffusion_rs_core/src/models/clip/mod.rs
similarity index 100%
rename from diffuse_rs_core/src/models/clip/mod.rs
rename to diffusion_rs_core/src/models/clip/mod.rs
diff --git a/diffuse_rs_core/src/models/clip/text.rs b/diffusion_rs_core/src/models/clip/text.rs
similarity index 78%
rename from diffuse_rs_core/src/models/clip/text.rs
rename to diffusion_rs_core/src/models/clip/text.rs
index 4fd3448..298e83c 100644
--- a/diffuse_rs_core/src/models/clip/text.rs
+++ b/diffusion_rs_core/src/models/clip/text.rs
@@ -1,7 +1,7 @@
#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
-use diffuse_rs_common::core::{DType, Device, IndexOp, Result, Tensor, D};
-use diffuse_rs_common::nn::{ops::sigmoid, Module};
+use diffusion_rs_common::core::{DType, Device, IndexOp, Result, Tensor, D};
+use diffusion_rs_common::nn::{ops::sigmoid, Module};
use serde::Deserialize;
#[derive(Debug, Clone, Copy, Deserialize)]
@@ -33,16 +33,19 @@ pub struct ClipTextConfig {
// TODO rewrite to be more similar to https://github.com/huggingface/transformers/blob/f6fa0f0bf0796ac66f201f23bdb8585de1609add/src/transformers/models/clip/modeling_clip.py#L142
#[derive(Clone, Debug)]
struct ClipTextEmbeddings {
- token_embedding: diffuse_rs_common::nn::Embedding,
- position_embedding: diffuse_rs_common::nn::Embedding,
+ token_embedding: diffusion_rs_common::nn::Embedding,
+ position_embedding: diffusion_rs_common::nn::Embedding,
position_ids: Tensor,
}
impl ClipTextEmbeddings {
- fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
- let token_embedding =
- diffuse_rs_common::embedding(c.vocab_size, c.projection_dim, vs.pp("token_embedding"))?;
- let position_embedding = diffuse_rs_common::embedding(
+ fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
+ let token_embedding = diffusion_rs_common::embedding(
+ c.vocab_size,
+ c.projection_dim,
+ vs.pp("token_embedding"),
+ )?;
+ let position_embedding = diffusion_rs_common::embedding(
c.max_position_embeddings,
c.projection_dim,
vs.pp("position_embedding"),
@@ -69,24 +72,24 @@ impl Module for ClipTextEmbeddings {
#[derive(Clone, Debug)]
struct ClipAttention {
- k_proj: diffuse_rs_common::nn::Linear,
- v_proj: diffuse_rs_common::nn::Linear,
- q_proj: diffuse_rs_common::nn::Linear,
- out_proj: diffuse_rs_common::nn::Linear,
+ k_proj: diffusion_rs_common::nn::Linear,
+ v_proj: diffusion_rs_common::nn::Linear,
+ q_proj: diffusion_rs_common::nn::Linear,
+ out_proj: diffusion_rs_common::nn::Linear,
head_dim: usize,
scale: f64,
num_attention_heads: usize,
}
impl ClipAttention {
- fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
+ fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
let projection_dim = c.projection_dim;
let num_attention_heads = c.num_attention_heads;
- let k_proj = diffuse_rs_common::linear(projection_dim, projection_dim, vs.pp("k_proj"))?;
- let v_proj = diffuse_rs_common::linear(projection_dim, projection_dim, vs.pp("v_proj"))?;
- let q_proj = diffuse_rs_common::linear(projection_dim, projection_dim, vs.pp("q_proj"))?;
+ let k_proj = diffusion_rs_common::linear(projection_dim, projection_dim, vs.pp("k_proj"))?;
+ let v_proj = diffusion_rs_common::linear(projection_dim, projection_dim, vs.pp("v_proj"))?;
+ let q_proj = diffusion_rs_common::linear(projection_dim, projection_dim, vs.pp("q_proj"))?;
let out_proj =
- diffuse_rs_common::linear(projection_dim, projection_dim, vs.pp("out_proj"))?;
+ diffusion_rs_common::linear(projection_dim, projection_dim, vs.pp("out_proj"))?;
let head_dim = projection_dim / num_attention_heads;
let scale = (head_dim as f64).powf(-0.5);
@@ -138,7 +141,7 @@ impl ClipAttention {
attn_weights
};
- let attn_weights = diffuse_rs_common::nn::ops::softmax(&attn_weights, D::Minus1)?;
+ let attn_weights = diffusion_rs_common::nn::ops::softmax(&attn_weights, D::Minus1)?;
let attn_output = attn_weights.matmul(&value_states)?.to_dtype(in_dtype)?;
let attn_output = attn_output
@@ -151,15 +154,15 @@ impl ClipAttention {
#[derive(Clone, Debug)]
struct ClipMlp {
- fc1: diffuse_rs_common::nn::Linear,
- fc2: diffuse_rs_common::nn::Linear,
+ fc1: diffusion_rs_common::nn::Linear,
+ fc2: diffusion_rs_common::nn::Linear,
activation: Activation,
}
impl ClipMlp {
- fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
- let fc1 = diffuse_rs_common::linear(c.projection_dim, c.intermediate_size, vs.pp("fc1"))?;
- let fc2 = diffuse_rs_common::linear(c.intermediate_size, c.projection_dim, vs.pp("fc2"))?;
+ fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
+ let fc1 = diffusion_rs_common::linear(c.projection_dim, c.intermediate_size, vs.pp("fc1"))?;
+ let fc2 = diffusion_rs_common::linear(c.intermediate_size, c.projection_dim, vs.pp("fc2"))?;
Ok(ClipMlp {
fc1,
@@ -179,19 +182,19 @@ impl ClipMlp {
#[derive(Clone, Debug)]
struct ClipEncoderLayer {
self_attn: ClipAttention,
- layer_norm1: diffuse_rs_common::nn::LayerNorm,
+ layer_norm1: diffusion_rs_common::nn::LayerNorm,
mlp: ClipMlp,
- layer_norm2: diffuse_rs_common::nn::LayerNorm,
+ layer_norm2: diffusion_rs_common::nn::LayerNorm,
}
impl ClipEncoderLayer {
- fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
+ fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
let self_attn = ClipAttention::new(vs.pp("self_attn"), c)?;
let layer_norm1 =
- diffuse_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("layer_norm1"))?;
+ diffusion_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("layer_norm1"))?;
let mlp = ClipMlp::new(vs.pp("mlp"), c)?;
let layer_norm2 =
- diffuse_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("layer_norm2"))?;
+ diffusion_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("layer_norm2"))?;
Ok(ClipEncoderLayer {
self_attn,
@@ -220,7 +223,7 @@ struct ClipEncoder {
}
impl ClipEncoder {
- pub fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
+ pub fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
let vs = vs.pp("layers");
let mut layers: Vec = Vec::new();
for index in 0..c.num_hidden_layers {
@@ -244,16 +247,16 @@ impl ClipEncoder {
pub struct ClipTextTransformer {
embeddings: ClipTextEmbeddings,
encoder: ClipEncoder,
- final_layer_norm: diffuse_rs_common::nn::LayerNorm,
+ final_layer_norm: diffusion_rs_common::nn::LayerNorm,
device: Device,
}
impl ClipTextTransformer {
- pub fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
+ pub fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result {
let embeddings = ClipTextEmbeddings::new(vs.pp("embeddings"), c)?;
let encoder = ClipEncoder::new(vs.pp("encoder"), c)?;
let final_layer_norm =
- diffuse_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("final_layer_norm"))?;
+ diffusion_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("final_layer_norm"))?;
Ok(ClipTextTransformer {
embeddings,
encoder,
diff --git a/diffuse_rs_core/src/models/flux/mod.rs b/diffusion_rs_core/src/models/flux/mod.rs
similarity index 100%
rename from diffuse_rs_core/src/models/flux/mod.rs
rename to diffusion_rs_core/src/models/flux/mod.rs
diff --git a/diffuse_rs_core/src/models/flux/model.rs b/diffusion_rs_core/src/models/flux/model.rs
similarity index 90%
rename from diffuse_rs_core/src/models/flux/model.rs
rename to diffusion_rs_core/src/models/flux/model.rs
index 176585a..0544237 100644
--- a/diffuse_rs_core/src/models/flux/model.rs
+++ b/diffusion_rs_core/src/models/flux/model.rs
@@ -2,13 +2,13 @@
use std::sync::Arc;
-use diffuse_rs_backend::{QuantMethod, QuantizedConfig};
-use diffuse_rs_common::core::{DType, Device, IndexOp, Result, Tensor, D};
-use diffuse_rs_common::nn::{layer_norm::RmsNormNonQuantized, LayerNorm, RmsNorm};
-use diffuse_rs_common::VarBuilder;
+use diffusion_rs_backend::{QuantMethod, QuantizedConfig};
+use diffusion_rs_common::core::{DType, Device, IndexOp, Result, Tensor, D};
+use diffusion_rs_common::nn::{layer_norm::RmsNormNonQuantized, LayerNorm, RmsNorm};
+use diffusion_rs_common::VarBuilder;
use serde::Deserialize;
-use diffuse_rs_common::NiceProgressBar;
+use diffusion_rs_common::NiceProgressBar;
use tracing::{span, Span};
use crate::models::{QuantizedModel, QuantizedModelLayer};
@@ -40,7 +40,7 @@ fn layer_norm(dim: usize, vb: VarBuilder) -> Result {
fn scaled_dot_product_attention(q: &Tensor, k: &Tensor, v: &Tensor) -> Result {
let dim = q.dim(D::Minus1)?;
let scale_factor = 1.0 / (dim as f64).sqrt();
- diffuse_rs_backend::ops::sdpa(
+ diffusion_rs_backend::ops::sdpa(
&q.to_dtype(DType::F32)?,
&k.to_dtype(DType::F32)?,
&v.to_dtype(DType::F32)?,
@@ -56,7 +56,7 @@ fn scaled_dot_product_attention(q: &Tensor, k: &Tensor, v: &Tensor) -> Result Result Result {
if dim % 2 == 1 {
- diffuse_rs_common::bail!("dim {dim} is odd")
+ diffusion_rs_common::bail!("dim {dim} is odd")
}
let dev = pos.device();
let theta = theta as f64;
@@ -105,17 +105,17 @@ fn timestep_embedding(t: &Tensor, dim: usize, dtype: DType) -> Result {
const TIME_FACTOR: f64 = 1000.;
const MAX_PERIOD: f64 = 10000.;
if dim % 2 == 1 {
- diffuse_rs_common::bail!("{dim} is odd")
+ diffusion_rs_common::bail!("{dim} is odd")
}
let dev = t.device();
let half = dim / 2;
let t = (t * TIME_FACTOR)?;
let arange =
- Tensor::arange(0, half as u32, dev)?.to_dtype(diffuse_rs_common::core::DType::F32)?;
+ Tensor::arange(0, half as u32, dev)?.to_dtype(diffusion_rs_common::core::DType::F32)?;
let freqs = (arange * (-MAX_PERIOD.ln() / half as f64))?.exp()?;
let args = t
.unsqueeze(1)?
- .to_dtype(diffuse_rs_common::core::DType::F32)?
+ .to_dtype(diffusion_rs_common::core::DType::F32)?
.broadcast_mul(&freqs.unsqueeze(0)?)?;
let emb = Tensor::cat(&[args.cos()?, args.sin()?], D::Minus1)?.to_dtype(dtype)?;
Ok(emb)
@@ -139,7 +139,7 @@ impl EmbedNd {
}
}
-impl diffuse_rs_common::core::Module for EmbedNd {
+impl diffusion_rs_common::core::Module for EmbedNd {
fn forward(&self, ids: &Tensor) -> Result {
let n_axes = ids.dim(D::Minus1)?;
let mut emb = Vec::with_capacity(n_axes);
@@ -165,9 +165,9 @@ pub struct MlpEmbedder {
impl MlpEmbedder {
fn new(in_sz: usize, h_sz: usize, cfg: &Config, vb: VarBuilder) -> Result {
let in_layer =
- diffuse_rs_backend::linear(in_sz, h_sz, &cfg.quantization_config, vb.pp("linear_1"))?;
+ diffusion_rs_backend::linear(in_sz, h_sz, &cfg.quantization_config, vb.pp("linear_1"))?;
let out_layer =
- diffuse_rs_backend::linear(h_sz, h_sz, &cfg.quantization_config, vb.pp("linear_2"))?;
+ diffusion_rs_backend::linear(h_sz, h_sz, &cfg.quantization_config, vb.pp("linear_2"))?;
Ok(Self {
in_layer,
out_layer,
@@ -175,7 +175,7 @@ impl MlpEmbedder {
}
}
-impl diffuse_rs_common::core::Module for MlpEmbedder {
+impl diffusion_rs_common::core::Module for MlpEmbedder {
fn forward(&self, xs: &Tensor) -> Result {
self.out_layer
.forward_autocast(&self.in_layer.forward_autocast(xs)?.silu()?)
@@ -234,7 +234,7 @@ struct Modulation1 {
impl Modulation1 {
fn new(dim: usize, cfg: &Config, vb: VarBuilder) -> Result {
let lin =
- diffuse_rs_backend::linear(dim, 3 * dim, &cfg.quantization_config, vb.pp("linear"))?;
+ diffusion_rs_backend::linear(dim, 3 * dim, &cfg.quantization_config, vb.pp("linear"))?;
Ok(Self {
lin,
mod1: span!(tracing::Level::TRACE, "flux-mod1"),
@@ -249,7 +249,7 @@ impl Modulation1 {
.unsqueeze(1)?
.chunk(3, D::Minus1)?;
if ys.len() != 3 {
- diffuse_rs_common::bail!("unexpected len from chunk {ys:?}")
+ diffusion_rs_common::bail!("unexpected len from chunk {ys:?}")
}
Ok(ModulationOut {
shift: ys[0].clone(),
@@ -268,7 +268,7 @@ struct Modulation2 {
impl Modulation2 {
fn new(dim: usize, cfg: &Config, vb: VarBuilder) -> Result {
let lin =
- diffuse_rs_backend::linear(dim, 6 * dim, &cfg.quantization_config, vb.pp("linear"))?;
+ diffusion_rs_backend::linear(dim, 6 * dim, &cfg.quantization_config, vb.pp("linear"))?;
Ok(Self {
lin,
mod2: span!(tracing::Level::TRACE, "flux-mod2"),
@@ -283,7 +283,7 @@ impl Modulation2 {
.unsqueeze(1)?
.chunk(6, D::Minus1)?;
if ys.len() != 6 {
- diffuse_rs_common::bail!("unexpected len from chunk {ys:?}")
+ diffusion_rs_common::bail!("unexpected len from chunk {ys:?}")
}
let mod1 = ModulationOut {
shift: ys[0].clone(),
@@ -322,21 +322,21 @@ impl SelfAttention {
) -> Result {
let head_dim = dim / num_attention_heads;
let (q, k, v, norm, proj) = if !context {
- let q = diffuse_rs_backend::linear_b(
+ let q = diffusion_rs_backend::linear_b(
dim,
dim,
qkv_bias,
&cfg.quantization_config,
vb.pp("to_q"),
)?;
- let k = diffuse_rs_backend::linear_b(
+ let k = diffusion_rs_backend::linear_b(
dim,
dim,
qkv_bias,
&cfg.quantization_config,
vb.pp("to_k"),
)?;
- let v = diffuse_rs_backend::linear_b(
+ let v = diffusion_rs_backend::linear_b(
dim,
dim,
qkv_bias,
@@ -344,26 +344,30 @@ impl SelfAttention {
vb.pp("to_v"),
)?;
let norm = QkNorm::new(head_dim, vb.pp("norm_q"), vb.pp("norm_k"))?;
- let proj =
- diffuse_rs_backend::linear(dim, dim, &cfg.quantization_config, vb.pp("to_out.0"))?;
+ let proj = diffusion_rs_backend::linear(
+ dim,
+ dim,
+ &cfg.quantization_config,
+ vb.pp("to_out.0"),
+ )?;
(q, k, v, norm, proj)
} else {
- let q = diffuse_rs_backend::linear_b(
+ let q = diffusion_rs_backend::linear_b(
dim,
dim,
qkv_bias,
&cfg.quantization_config,
vb.pp("add_q_proj"),
)?;
- let k = diffuse_rs_backend::linear_b(
+ let k = diffusion_rs_backend::linear_b(
dim,
dim,
qkv_bias,
&cfg.quantization_config,
vb.pp("add_k_proj"),
)?;
- let v = diffuse_rs_backend::linear_b(
+ let v = diffusion_rs_backend::linear_b(
dim,
dim,
qkv_bias,
@@ -371,7 +375,7 @@ impl SelfAttention {
vb.pp("add_v_proj"),
)?;
let norm = QkNorm::new(head_dim, vb.pp("norm_added_q"), vb.pp("norm_added_k"))?;
- let proj = diffuse_rs_backend::linear(
+ let proj = diffusion_rs_backend::linear(
dim,
dim,
&cfg.quantization_config,
@@ -440,8 +444,9 @@ struct Mlp {
impl Mlp {
fn new(in_sz: usize, mlp_sz: usize, cfg: &Config, vb: VarBuilder) -> Result {
let lin1 =
- diffuse_rs_backend::linear(in_sz, mlp_sz, &cfg.quantization_config, vb.pp("0.proj"))?;
- let lin2 = diffuse_rs_backend::linear(mlp_sz, in_sz, &cfg.quantization_config, vb.pp("2"))?;
+ diffusion_rs_backend::linear(in_sz, mlp_sz, &cfg.quantization_config, vb.pp("0.proj"))?;
+ let lin2 =
+ diffusion_rs_backend::linear(mlp_sz, in_sz, &cfg.quantization_config, vb.pp("2"))?;
Ok(Self {
lin1,
lin2,
@@ -450,7 +455,7 @@ impl Mlp {
}
}
-impl diffuse_rs_common::core::Module for Mlp {
+impl diffusion_rs_common::core::Module for Mlp {
fn forward(&self, xs: &Tensor) -> Result {
let _span = self.mlp.enter();
self.lin2
@@ -579,28 +584,28 @@ impl SingleStreamBlock {
let mlp_sz = (h_sz as f64 * MLP_RATIO) as usize;
let head_dim = h_sz / cfg.num_attention_heads;
- let q = diffuse_rs_backend::linear_b(
+ let q = diffusion_rs_backend::linear_b(
h_sz,
h_sz,
true,
&cfg.quantization_config,
vb.pp("attn.to_q"),
)?;
- let k = diffuse_rs_backend::linear_b(
+ let k = diffusion_rs_backend::linear_b(
h_sz,
h_sz,
true,
&cfg.quantization_config,
vb.pp("attn.to_k"),
)?;
- let v = diffuse_rs_backend::linear_b(
+ let v = diffusion_rs_backend::linear_b(
h_sz,
h_sz,
true,
&cfg.quantization_config,
vb.pp("attn.to_v"),
)?;
- let proj_mlp = diffuse_rs_backend::linear_b(
+ let proj_mlp = diffusion_rs_backend::linear_b(
h_sz,
mlp_sz,
true,
@@ -608,7 +613,7 @@ impl SingleStreamBlock {
vb.pp("proj_mlp"),
)?;
- let linear2 = diffuse_rs_backend::linear(
+ let linear2 = diffusion_rs_backend::linear(
h_sz + mlp_sz,
h_sz,
&cfg.quantization_config,
@@ -667,13 +672,13 @@ pub struct LastLayer {
impl LastLayer {
fn new(h_sz: usize, p_sz: usize, out_c: usize, cfg: &Config, vb: VarBuilder) -> Result {
let norm_final = layer_norm(h_sz, vb.pp("norm_final"))?;
- let linear = diffuse_rs_backend::linear(
+ let linear = diffusion_rs_backend::linear(
h_sz,
p_sz * p_sz * out_c,
&cfg.quantization_config,
vb.pp("proj_out"),
)?;
- let ada_ln_modulation = diffuse_rs_backend::linear(
+ let ada_ln_modulation = diffusion_rs_backend::linear(
h_sz,
2 * h_sz,
&cfg.quantization_config,
@@ -715,13 +720,13 @@ pub struct Flux {
impl Flux {
pub fn new(cfg: &Config, vb: VarBuilder) -> Result {
- let img_in = diffuse_rs_backend::linear(
+ let img_in = diffusion_rs_backend::linear(
cfg.in_channels,
HIDDEN_SIZE,
&cfg.quantization_config,
vb.pp("x_embedder"),
)?;
- let txt_in = diffuse_rs_backend::linear(
+ let txt_in = diffusion_rs_backend::linear(
cfg.joint_attention_dim,
HIDDEN_SIZE,
&cfg.quantization_config,
@@ -793,10 +798,10 @@ impl Flux {
guidance: Option<&Tensor>,
) -> Result {
if txt.rank() != 3 {
- diffuse_rs_common::bail!("unexpected shape for txt {:?}", txt.shape())
+ diffusion_rs_common::bail!("unexpected shape for txt {:?}", txt.shape())
}
if img.rank() != 3 {
- diffuse_rs_common::bail!("unexpected shape for img {:?}", img.shape())
+ diffusion_rs_common::bail!("unexpected shape for img {:?}", img.shape())
}
let dtype = img.dtype();
let pe = {
diff --git a/diffuse_rs_core/src/models/mod.rs b/diffusion_rs_core/src/models/mod.rs
similarity index 93%
rename from diffuse_rs_core/src/models/mod.rs
rename to diffusion_rs_core/src/models/mod.rs
index 7752367..d9e173a 100644
--- a/diffuse_rs_core/src/models/mod.rs
+++ b/diffusion_rs_core/src/models/mod.rs
@@ -6,8 +6,8 @@ mod vaes;
use std::sync::Arc;
pub use clip::{ClipTextConfig, ClipTextTransformer};
-use diffuse_rs_backend::QuantMethod;
-use diffuse_rs_common::core::{Device, Result};
+use diffusion_rs_backend::QuantMethod;
+use diffusion_rs_common::core::{Device, Result};
pub use flux::{FluxConfig, FluxModel};
pub use t5::{T5Config, T5EncoderModel};
diff --git a/diffuse_rs_core/src/models/t5/mod.rs b/diffusion_rs_core/src/models/t5/mod.rs
similarity index 97%
rename from diffuse_rs_core/src/models/t5/mod.rs
rename to diffusion_rs_core/src/models/t5/mod.rs
index cbd5602..09f1147 100644
--- a/diffuse_rs_core/src/models/t5/mod.rs
+++ b/diffusion_rs_core/src/models/t5/mod.rs
@@ -3,10 +3,10 @@
// T5 Text Model
// https://github.com/huggingface/transformers/blob/main/src/transformers/models/t5/modeling_t5.py
-use diffuse_rs_backend::{linear_no_bias, QuantMethod, QuantizedConfig};
-use diffuse_rs_common::core::{DType, Device, Module, Result, Tensor, D};
-use diffuse_rs_common::nn::{Activation, Embedding};
-use diffuse_rs_common::{embedding, VarBuilder};
+use diffusion_rs_backend::{linear_no_bias, QuantMethod, QuantizedConfig};
+use diffusion_rs_common::core::{DType, Device, Module, Result, Tensor, D};
+use diffusion_rs_common::nn::{Activation, Embedding};
+use diffusion_rs_common::{embedding, VarBuilder};
use serde::Deserialize;
use std::sync::Arc;
@@ -41,7 +41,7 @@ fn masked_fill(on_false: &Tensor, mask: &Tensor, on_true: f32) -> Result
#[derive(Debug, Deserialize, Default, Clone, PartialEq)]
pub struct ActivationWithOptionalGating {
gated: bool,
- activation: diffuse_rs_common::nn::Activation,
+ activation: diffusion_rs_common::nn::Activation,
}
fn deserialize_feed_forward_proj_activation<'de, D>(
@@ -53,11 +53,11 @@ where
match String::deserialize(deserializer)?.as_str() {
"gated-gelu" => Ok(ActivationWithOptionalGating {
gated: true,
- activation: diffuse_rs_common::nn::Activation::NewGelu,
+ activation: diffusion_rs_common::nn::Activation::NewGelu,
}),
"gated-silu" => Ok(ActivationWithOptionalGating {
gated: true,
- activation: diffuse_rs_common::nn::Activation::Silu,
+ activation: diffusion_rs_common::nn::Activation::Silu,
}),
buf => {
let activation = serde_plain::from_str(buf).map_err(serde::de::Error::custom)?;
@@ -387,7 +387,7 @@ impl T5Attention {
},
};
- let attn_weights = { diffuse_rs_common::nn::ops::softmax_last_dim(&scores)? };
+ let attn_weights = { diffusion_rs_common::nn::ops::softmax_last_dim(&scores)? };
let attn_output = attn_weights.matmul(&v)?;
let attn_output = attn_output
.transpose(1, 2)?
diff --git a/diffuse_rs_core/src/models/vaes/autoencoder_kl.rs b/diffusion_rs_core/src/models/vaes/autoencoder_kl.rs
similarity index 93%
rename from diffuse_rs_core/src/models/vaes/autoencoder_kl.rs
rename to diffusion_rs_core/src/models/vaes/autoencoder_kl.rs
index 736bae0..01a0330 100644
--- a/diffuse_rs_core/src/models/vaes/autoencoder_kl.rs
+++ b/diffusion_rs_core/src/models/vaes/autoencoder_kl.rs
@@ -1,6 +1,6 @@
-use diffuse_rs_common::core::{Result, Tensor};
-use diffuse_rs_common::nn::{Activation, Conv2d, Conv2dConfig};
-use diffuse_rs_common::VarBuilder;
+use diffusion_rs_common::core::{Result, Tensor};
+use diffusion_rs_common::nn::{Activation, Conv2d, Conv2dConfig};
+use diffusion_rs_common::VarBuilder;
use serde::Deserialize;
use super::{
@@ -65,7 +65,7 @@ impl AutoEncoderKl {
let decoder = Decoder::new(&cfg.clone().into(), vb.pp("decoder"))?;
let reg = DiagonalGaussian::new(true, 1)?;
let quant_conv = if cfg.use_quant_conv {
- Some(diffuse_rs_common::conv2d(
+ Some(diffusion_rs_common::conv2d(
2 * cfg.latent_channels,
2 * cfg.latent_channels,
1,
@@ -76,7 +76,7 @@ impl AutoEncoderKl {
None
};
let post_quant_conv = if cfg.use_post_quant_conv {
- Some(diffuse_rs_common::conv2d(
+ Some(diffusion_rs_common::conv2d(
cfg.latent_channels,
cfg.latent_channels,
1,
diff --git a/diffuse_rs_core/src/models/vaes/mod.rs b/diffusion_rs_core/src/models/vaes/mod.rs
similarity index 94%
rename from diffuse_rs_core/src/models/vaes/mod.rs
rename to diffusion_rs_core/src/models/vaes/mod.rs
index df16ffc..1a8c3d0 100644
--- a/diffuse_rs_core/src/models/vaes/mod.rs
+++ b/diffusion_rs_core/src/models/vaes/mod.rs
@@ -1,13 +1,13 @@
use std::sync::Arc;
use autoencoder_kl::{AutencoderKlConfig, AutoEncoderKl};
-use diffuse_rs_common::{
+use diffusion_rs_common::{
core::{Device, Result, Tensor},
ModelSource,
};
use serde::Deserialize;
-use diffuse_rs_common::{from_mmaped_safetensors, FileData, VarBuilder};
+use diffusion_rs_common::{from_mmaped_safetensors, FileData, VarBuilder};
mod autoencoder_kl;
mod vae;
diff --git a/diffuse_rs_core/src/models/vaes/vae.rs b/diffusion_rs_core/src/models/vaes/vae.rs
similarity index 88%
rename from diffuse_rs_core/src/models/vaes/vae.rs
rename to diffusion_rs_core/src/models/vaes/vae.rs
index 231a16e..84d1035 100644
--- a/diffuse_rs_core/src/models/vaes/vae.rs
+++ b/diffusion_rs_core/src/models/vaes/vae.rs
@@ -1,8 +1,8 @@
#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
-use diffuse_rs_common::core::{Result, Tensor, D};
-use diffuse_rs_common::nn::{Activation, Conv2d, Conv2dConfig, GroupNorm};
-use diffuse_rs_common::{conv2d, group_norm, linear, VarBuilder};
+use diffusion_rs_common::core::{Result, Tensor, D};
+use diffusion_rs_common::nn::{Activation, Conv2d, Conv2dConfig, GroupNorm};
+use diffusion_rs_common::{conv2d, group_norm, linear, VarBuilder};
use serde::Deserialize;
use tracing::{span, Span};
@@ -29,7 +29,7 @@ fn scaled_dot_product_attention(q: &Tensor, k: &Tensor, v: &Tensor) -> Result Result {
let _span = self.attn.enter();
let init_xs = xs;
@@ -123,7 +123,7 @@ struct ResnetBlock {
impl ResnetBlock {
fn new(in_c: usize, out_c: usize, vb: VarBuilder, cfg: &VAEConfig) -> Result {
- let conv_cfg = diffuse_rs_common::nn::Conv2dConfig {
+ let conv_cfg = diffusion_rs_common::nn::Conv2dConfig {
padding: 1,
..Default::default()
};
@@ -154,7 +154,7 @@ impl ResnetBlock {
}
}
-impl diffuse_rs_common::core::Module for ResnetBlock {
+impl diffusion_rs_common::core::Module for ResnetBlock {
fn forward(&self, xs: &Tensor) -> Result {
let _span = self.resnet.enter();
let h = xs
@@ -179,7 +179,7 @@ struct Downsample {
impl Downsample {
fn new(in_c: usize, vb: VarBuilder) -> Result {
- let conv_cfg = diffuse_rs_common::nn::Conv2dConfig {
+ let conv_cfg = diffusion_rs_common::nn::Conv2dConfig {
stride: 2,
..Default::default()
};
@@ -191,7 +191,7 @@ impl Downsample {
}
}
-impl diffuse_rs_common::core::Module for Downsample {
+impl diffusion_rs_common::core::Module for Downsample {
fn forward(&self, xs: &Tensor) -> Result {
let _span = self.downsample.enter();
let xs = xs.pad_with_zeros(D::Minus1, 0, 1)?;
@@ -208,7 +208,7 @@ struct Upsample {
impl Upsample {
fn new(in_c: usize, vb: VarBuilder) -> Result {
- let conv_cfg = diffuse_rs_common::nn::Conv2dConfig {
+ let conv_cfg = diffusion_rs_common::nn::Conv2dConfig {
padding: 1,
..Default::default()
};
@@ -220,7 +220,7 @@ impl Upsample {
}
}
-impl diffuse_rs_common::core::Module for Upsample {
+impl diffusion_rs_common::core::Module for Upsample {
fn forward(&self, xs: &Tensor) -> Result {
let _ = self.upsample.enter();
let (_, _, h, w) = xs.dims4()?;
@@ -253,9 +253,11 @@ impl Encoder {
.iter()
.all(|x| x == "DownEncoderBlock2D")
{
- diffuse_rs_common::bail!("All down (encoder) block types must be `DownEncoderBlock2D`");
+ diffusion_rs_common::bail!(
+ "All down (encoder) block types must be `DownEncoderBlock2D`"
+ );
}
- let conv_cfg = diffuse_rs_common::nn::Conv2dConfig {
+ let conv_cfg = diffusion_rs_common::nn::Conv2dConfig {
padding: 1,
..Default::default()
};
@@ -291,8 +293,8 @@ impl Encoder {
// TODO: this is technically not general enough. Should always start with 1 resnet, then unet num_layers (defaults to 1 so this is OK)
// repeats of attention and resnet!
- // https://github.com/huggingface/diffuse_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffuse_rs/models/unets/unet_2d_blocks.py#L644-L729
- // https://github.com/huggingface/diffuse_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffuse_rs/models/unets/unet_2d_blocks.py#L625
+ // https://github.com/huggingface/diffusion_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffusion_rs/models/unets/unet_2d_blocks.py#L644-L729
+ // https://github.com/huggingface/diffusion_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffusion_rs/models/unets/unet_2d_blocks.py#L625
let mid_block_1 = ResnetBlock::new(block_in, block_in, vb.pp("mid_block.resnets.0"), cfg)?;
let mid_attn_1 = if cfg.mid_block_add_attention {
Some(AttnBlock::new(
@@ -325,7 +327,7 @@ impl Encoder {
}
}
-impl diffuse_rs_common::nn::Module for Encoder {
+impl diffusion_rs_common::nn::Module for Encoder {
fn forward(&self, xs: &Tensor) -> Result {
let mut h = xs.apply(&self.conv_in)?;
for block in self.down.iter() {
@@ -368,9 +370,9 @@ pub struct Decoder {
impl Decoder {
pub fn new(cfg: &VAEConfig, vb: VarBuilder) -> Result {
if !cfg.up_block_types.iter().all(|x| x == "UpDecoderBlock2D") {
- diffuse_rs_common::bail!("All up (decoder) block types must be `UpDecoderBlock2D`");
+ diffusion_rs_common::bail!("All up (decoder) block types must be `UpDecoderBlock2D`");
}
- let conv_cfg = diffuse_rs_common::nn::Conv2dConfig {
+ let conv_cfg = diffusion_rs_common::nn::Conv2dConfig {
padding: 1,
..Default::default()
};
@@ -379,8 +381,8 @@ impl Decoder {
// TODO: this is technically not general enough. Should always start with 1 resnet, then unet num_layers (defaults to 1 so this is OK)
// repeats of attention and resnet!
- // https://github.com/huggingface/diffuse_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffuse_rs/models/unets/unet_2d_blocks.py#L644-L729
- // https://github.com/huggingface/diffuse_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffuse_rs/models/unets/unet_2d_blocks.py#L625
+ // https://github.com/huggingface/diffusion_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffusion_rs/models/unets/unet_2d_blocks.py#L644-L729
+ // https://github.com/huggingface/diffusion_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffusion_rs/models/unets/unet_2d_blocks.py#L625
let mid_block_1 = ResnetBlock::new(block_in, block_in, vb.pp("mid_block.resnets.0"), cfg)?;
let mid_attn_1 = if cfg.mid_block_add_attention {
Some(AttnBlock::new(
@@ -431,7 +433,7 @@ impl Decoder {
}
}
-impl diffuse_rs_common::nn::Module for Decoder {
+impl diffusion_rs_common::nn::Module for Decoder {
fn forward(&self, xs: &Tensor) -> Result {
let h = xs.apply(&self.conv_in)?;
let mut h = h.apply(&self.mid_block_1)?;
@@ -465,7 +467,7 @@ impl DiagonalGaussian {
}
}
-impl diffuse_rs_common::nn::Module for DiagonalGaussian {
+impl diffusion_rs_common::nn::Module for DiagonalGaussian {
fn forward(&self, xs: &Tensor) -> Result {
let chunks = xs.chunk(2, self.chunk_dim)?;
if self.sample {
diff --git a/diffuse_rs_core/src/pipelines/flux/mod.rs b/diffusion_rs_core/src/pipelines/flux/mod.rs
similarity index 93%
rename from diffuse_rs_core/src/pipelines/flux/mod.rs
rename to diffusion_rs_core/src/pipelines/flux/mod.rs
index 48e0a9d..1fecdaf 100644
--- a/diffuse_rs_core/src/pipelines/flux/mod.rs
+++ b/diffusion_rs_core/src/pipelines/flux/mod.rs
@@ -2,8 +2,8 @@ use std::sync::Mutex;
use std::{cmp::Ordering, collections::HashMap, sync::Arc};
use anyhow::Result;
-use diffuse_rs_common::core::{DType, Device, Tensor, D};
-use diffuse_rs_common::nn::Module;
+use diffusion_rs_common::core::{DType, Device, Tensor, D};
+use diffusion_rs_common::nn::Module;
use tokenizers::Tokenizer;
use tracing::info;
@@ -15,7 +15,7 @@ use crate::{
},
pipelines::ComponentName,
};
-use diffuse_rs_common::{from_mmaped_safetensors, ModelSource};
+use diffusion_rs_common::{from_mmaped_safetensors, ModelSource};
use super::sampling::Sampler;
use super::scheduler::SchedulerConfig;
@@ -74,7 +74,7 @@ impl Loader for FluxLoader {
let vocab_file = &files["tokenizer/vocab.json"];
let merges_file = &files["tokenizer/merges.txt"];
- diffuse_rs_common::load_bpe_tokenizer(vocab_file, merges_file, &source)?
+ diffusion_rs_common::load_bpe_tokenizer(vocab_file, merges_file, &source)?
} else {
anyhow::bail!("incorrect storage of clip tokenizer")
};
@@ -201,11 +201,11 @@ impl FluxPipeline {
fn tokenize_and_pad(
prompts: Vec,
tokenizer: &Tokenizer,
- ) -> diffuse_rs_common::core::Result>> {
+ ) -> diffusion_rs_common::core::Result>> {
let mut t5_tokens = Vec::new();
let unpadded_t5_tokens = tokenizer
.encode_batch(prompts, true)
- .map_err(|e| diffuse_rs_common::core::Error::Msg(e.to_string()))?
+ .map_err(|e| diffusion_rs_common::core::Error::Msg(e.to_string()))?
.into_iter()
.map(|e| e.get_ids().to_vec())
.collect::>();
@@ -225,7 +225,7 @@ impl ModelPipeline for FluxPipeline {
prompts: Vec,
params: DiffusionGenerationParams,
offloading_type: Option,
- ) -> diffuse_rs_common::core::Result {
+ ) -> diffusion_rs_common::core::Result {
match offloading_type {
Some(Offloading::Full) => {
self.t5_model.to_device(&self.device)?;
@@ -241,7 +241,7 @@ impl ModelPipeline for FluxPipeline {
if !self.flux_model.is_guidance() {
match t5_input_ids.dim(1)?.cmp(&256) {
Ordering::Greater => {
- diffuse_rs_common::bail!("T5 embedding length greater than 256, please shrink the prompt or use the -dev (with guidance distillation) version.")
+ diffusion_rs_common::bail!("T5 embedding length greater than 256, please shrink the prompt or use the -dev (with guidance distillation) version.")
}
Ordering::Less | Ordering::Equal => {
t5_input_ids =
@@ -300,7 +300,7 @@ impl ModelPipeline for FluxPipeline {
} else {
None
};
- let step = |img: &Tensor, t_vec: &Tensor| -> diffuse_rs_common::core::Result {
+ let step = |img: &Tensor, t_vec: &Tensor| -> diffusion_rs_common::core::Result {
self.flux_model.forward(
img,
&state.img_ids,
diff --git a/diffuse_rs_core/src/pipelines/flux/sampling.rs b/diffusion_rs_core/src/pipelines/flux/sampling.rs
similarity index 97%
rename from diffuse_rs_core/src/pipelines/flux/sampling.rs
rename to diffusion_rs_core/src/pipelines/flux/sampling.rs
index 78f1bf1..7191d4b 100644
--- a/diffuse_rs_core/src/pipelines/flux/sampling.rs
+++ b/diffusion_rs_core/src/pipelines/flux/sampling.rs
@@ -1,6 +1,6 @@
#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
-use diffuse_rs_common::core::{Device, Result, Tensor};
+use diffusion_rs_common::core::{Device, Result, Tensor};
pub fn get_noise(
num_samples: usize,
diff --git a/diffuse_rs_core/src/pipelines/mod.rs b/diffusion_rs_core/src/pipelines/mod.rs
similarity index 97%
rename from diffuse_rs_core/src/pipelines/mod.rs
rename to diffusion_rs_core/src/pipelines/mod.rs
index 56ad98b..ed5073b 100644
--- a/diffuse_rs_core/src/pipelines/mod.rs
+++ b/diffusion_rs_core/src/pipelines/mod.rs
@@ -9,12 +9,12 @@ use std::{
};
use anyhow::Result;
-use diffuse_rs_common::core::{Device, Tensor};
+use diffusion_rs_common::core::{Device, Tensor};
use flux::FluxLoader;
use image::{DynamicImage, RgbImage};
use serde::Deserialize;
-use diffuse_rs_common::{FileData, FileLoader, ModelSource, NiceProgressBar, TokenSource};
+use diffusion_rs_common::{FileData, FileLoader, ModelSource, NiceProgressBar, TokenSource};
use tracing::info;
/// Generation parameters.
@@ -94,7 +94,7 @@ pub trait ModelPipeline: Send + Sync {
prompts: Vec,
params: DiffusionGenerationParams,
offloading_type: Option,
- ) -> diffuse_rs_common::core::Result;
+ ) -> diffusion_rs_common::core::Result;
}
#[derive(Clone, Debug, Deserialize)]
@@ -251,7 +251,7 @@ impl Pipeline {
#[allow(clippy::cast_possible_truncation)]
images.push(DynamicImage::ImageRgb8(
RgbImage::from_raw(w as u32, h as u32, flattened.to_vec1::()?).ok_or(
- diffuse_rs_common::core::Error::Msg(
+ diffusion_rs_common::core::Error::Msg(
"RgbImage has invalid capacity.".to_string(),
),
)?,
diff --git a/diffuse_rs_core/src/pipelines/sampling.rs b/diffusion_rs_core/src/pipelines/sampling.rs
similarity index 98%
rename from diffuse_rs_core/src/pipelines/sampling.rs
rename to diffusion_rs_core/src/pipelines/sampling.rs
index 91328b1..24f5bc3 100644
--- a/diffuse_rs_core/src/pipelines/sampling.rs
+++ b/diffusion_rs_core/src/pipelines/sampling.rs
@@ -1,4 +1,4 @@
-use diffuse_rs_common::{
+use diffusion_rs_common::{
core::{Result, Tensor},
NiceProgressBar,
};
diff --git a/diffuse_rs_core/src/pipelines/scheduler.rs b/diffusion_rs_core/src/pipelines/scheduler.rs
similarity index 96%
rename from diffuse_rs_core/src/pipelines/scheduler.rs
rename to diffusion_rs_core/src/pipelines/scheduler.rs
index 58fb00f..ce1e5f0 100644
--- a/diffuse_rs_core/src/pipelines/scheduler.rs
+++ b/diffusion_rs_core/src/pipelines/scheduler.rs
@@ -1,4 +1,4 @@
-use diffuse_rs_common::core::{Context, Result};
+use diffusion_rs_common::core::{Context, Result};
use serde::Deserialize;
#[derive(Deserialize, Clone)]
diff --git a/diffuse_rs_examples/Cargo.toml b/diffusion_rs_examples/Cargo.toml
similarity index 74%
rename from diffuse_rs_examples/Cargo.toml
rename to diffusion_rs_examples/Cargo.toml
index c9cb455..2455d60 100644
--- a/diffuse_rs_examples/Cargo.toml
+++ b/diffusion_rs_examples/Cargo.toml
@@ -1,10 +1,10 @@
[package]
-name = "diffuse_rs_examples"
+name = "diffusion_rs_examples"
readme.workspace = true
authors.workspace = true
version.workspace = true
edition.workspace = true
-description = "Examples of diffuse_rs"
+description = "Examples of diffusion_rs"
repository.workspace = true
keywords.workspace = true
categories.workspace = true
@@ -12,7 +12,7 @@ license.workspace = true
homepage.workspace = true
[dependencies]
-diffuse_rs_core = { path = "../diffuse_rs_core" }
+diffusion_rs_core = { path = "../diffusion_rs_core" }
anyhow.workspace = true
clap.workspace = true
tracing.workspace = true
diff --git a/diffuse_rs_examples/examples/dduf/README.md b/diffusion_rs_examples/examples/dduf/README.md
similarity index 100%
rename from diffuse_rs_examples/examples/dduf/README.md
rename to diffusion_rs_examples/examples/dduf/README.md
diff --git a/diffuse_rs_examples/examples/dduf/main.rs b/diffusion_rs_examples/examples/dduf/main.rs
similarity index 92%
rename from diffuse_rs_examples/examples/dduf/main.rs
rename to diffusion_rs_examples/examples/dduf/main.rs
index 3b26431..431c753 100644
--- a/diffuse_rs_examples/examples/dduf/main.rs
+++ b/diffusion_rs_examples/examples/dduf/main.rs
@@ -1,7 +1,9 @@
use std::time::Instant;
use clap::Parser;
-use diffuse_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource};
+use diffusion_rs_core::{
+ DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource,
+};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::EnvFilter;
diff --git a/diffuse_rs_examples/examples/flux/README.md b/diffusion_rs_examples/examples/flux/README.md
similarity index 100%
rename from diffuse_rs_examples/examples/flux/README.md
rename to diffusion_rs_examples/examples/flux/README.md
diff --git a/diffuse_rs_examples/examples/flux/main.rs b/diffusion_rs_examples/examples/flux/main.rs
similarity index 94%
rename from diffuse_rs_examples/examples/flux/main.rs
rename to diffusion_rs_examples/examples/flux/main.rs
index fd173d0..81f1665 100644
--- a/diffuse_rs_examples/examples/flux/main.rs
+++ b/diffusion_rs_examples/examples/flux/main.rs
@@ -1,6 +1,8 @@
use std::time::Instant;
-use diffuse_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource};
+use diffusion_rs_core::{
+ DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource,
+};
use clap::{Parser, ValueEnum};
use tracing::level_filters::LevelFilter;
diff --git a/diffuse_rs_py/Cargo.toml b/diffusion_rs_py/Cargo.toml
similarity index 61%
rename from diffuse_rs_py/Cargo.toml
rename to diffusion_rs_py/Cargo.toml
index 0404a84..4006933 100644
--- a/diffuse_rs_py/Cargo.toml
+++ b/diffusion_rs_py/Cargo.toml
@@ -1,5 +1,5 @@
[package]
-name = "diffuse_rs_py"
+name = "diffusion_rs_py"
readme.workspace = true
authors.workspace = true
version.workspace = true
@@ -12,13 +12,13 @@ license.workspace = true
homepage.workspace = true
[lib]
-name = "diffuse_rs"
+name = "diffusion_rs"
crate-type = ["cdylib"]
doc = false
[dependencies]
pyo3.workspace = true
-diffuse_rs_core = { path = "../diffuse_rs_core" }
+diffusion_rs_core = { path = "../diffusion_rs_core" }
anyhow.workspace = true
image.workspace = true
@@ -26,8 +26,8 @@ image.workspace = true
pyo3-build-config = "0.23"
[features]
-cuda = ["diffuse_rs_core/cuda"]
-cudnn = ["diffuse_rs_core/cudnn"]
-metal = ["diffuse_rs_core/metal"]
-accelerate = ["diffuse_rs_core/accelerate"]
-mkl = ["diffuse_rs_core/mkl"]
+cuda = ["diffusion_rs_core/cuda"]
+cudnn = ["diffusion_rs_core/cudnn"]
+metal = ["diffusion_rs_core/metal"]
+accelerate = ["diffusion_rs_core/accelerate"]
+mkl = ["diffusion_rs_core/mkl"]
diff --git a/diffuse_rs_py/build.rs b/diffusion_rs_py/build.rs
similarity index 100%
rename from diffuse_rs_py/build.rs
rename to diffusion_rs_py/build.rs
diff --git a/diffuse_rs_py/diffuse_rs.pyi b/diffusion_rs_py/diffuse_rs.pyi
similarity index 100%
rename from diffuse_rs_py/diffuse_rs.pyi
rename to diffusion_rs_py/diffuse_rs.pyi
diff --git a/diffuse_rs_py/examples/dduf.py b/diffusion_rs_py/examples/dduf.py
similarity index 82%
rename from diffuse_rs_py/examples/dduf.py
rename to diffusion_rs_py/examples/dduf.py
index 8033ef6..2f9113e 100644
--- a/diffuse_rs_py/examples/dduf.py
+++ b/diffusion_rs_py/examples/dduf.py
@@ -1,4 +1,4 @@
-from diffuse_rs import DiffusionGenerationParams, ModelSource, Pipeline
+from diffusion_rs import DiffusionGenerationParams, ModelSource, Pipeline
from PIL import Image
import io
diff --git a/diffuse_rs_py/examples/flux.py b/diffusion_rs_py/examples/flux.py
similarity index 83%
rename from diffuse_rs_py/examples/flux.py
rename to diffusion_rs_py/examples/flux.py
index 42f1b4a..ab27892 100644
--- a/diffuse_rs_py/examples/flux.py
+++ b/diffusion_rs_py/examples/flux.py
@@ -1,4 +1,4 @@
-from diffuse_rs import DiffusionGenerationParams, ModelSource, Pipeline
+from diffusion_rs import DiffusionGenerationParams, ModelSource, Pipeline
from PIL import Image
import io
diff --git a/diffusion_rs_py/generate_wheels.sh b/diffusion_rs_py/generate_wheels.sh
new file mode 100644
index 0000000..56c21af
--- /dev/null
+++ b/diffusion_rs_py/generate_wheels.sh
@@ -0,0 +1,64 @@
+###################################
+### UPLOADING
+###################################
+
+# ⚠️⚠️⚠️⚠️ Be sure to update the `project.name` field in `pyproject.toml`!! ⚠️⚠️⚠️⚠️
+# diffusion_rs, diffusion_rs_cuda, diffusion_rs_metal, diffusion_rs_mkl, diffusion_rs_accelerate
+
+## testpypi:
+# twine upload --repository pypi --password PASSWORD --username __token__ wheels-NAME/*.whl
+
+
+## pypi:
+# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cuda/*.whl
+# twine upload --repository pypi --password PASSWORD --username __token__ wheels-mkl/*.whl
+# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cuda/*.whl
+# twine upload --repository pypi --password PASSWORD --username __token__ wheels-metal/*.whl
+# ⚠️ Need both x86_64 and aarch64 builds before this! ⚠️
+# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cpu/*.whl
+
+
+###################################
+#### MAC: Aarch64 Manylinux and OSX
+###################################
+
+docker build -t wheelmaker:latest -f Dockerfile.manylinux .
+docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.10
+docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.11
+docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.12
+
+maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.10
+maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.11
+maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.12
+
+# Metal
+
+maturin build -o wheels-metal -m diffusion_rs_py/Cargo.toml --interpreter python3.10 --features metal
+maturin build -o wheels-metal -m diffusion_rs_py/Cargo.toml --interpreter python3.11 --features metal
+maturin build -o wheels-metal -m diffusion_rs_py/Cargo.toml --interpreter python3.12 --features metal
+
+# Accelerate
+
+maturin build -o wheels-accelerate -m diffusion_rs_py/Cargo.toml --interpreter python3.10 --features accelerate
+maturin build -o wheels-accelerate -m diffusion_rs_py/Cargo.toml --interpreter python3.11 --features accelerate
+maturin build -o wheels-accelerate -m diffusion_rs_py/Cargo.toml --interpreter python3.12 --features accelerate
+
+####################################
+# WINDOWS: x86_64 Manylinux, Windows
+####################################
+
+maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.10
+maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.11
+maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.12
+
+# CUDA
+
+maturin build -o wheels-cuda -m diffusion_rs_py/Cargo.toml --interpreter python3.10 --features cuda
+maturin build -o wheels-cuda -m diffusion_rs_py/Cargo.toml --interpreter python3.11 --features cuda
+maturin build -o wheels-cuda -m diffusion_rs_py/Cargo.toml --interpreter python3.12 --features cuda
+
+# MKL
+
+maturin build -o wheels-mkl -m diffusion_rs_py/Cargo.toml --interpreter python3.10 --features mkl
+maturin build -o wheels-mkl -m diffusion_rs_py/Cargo.toml --interpreter python3.11 --features mkl
+maturin build -o wheels-mkl -m diffusion_rs_py/Cargo.toml --interpreter python3.12 --features mkl
diff --git a/diffuse_rs_py/pyproject.toml b/diffusion_rs_py/pyproject.toml
similarity index 92%
rename from diffuse_rs_py/pyproject.toml
rename to diffusion_rs_py/pyproject.toml
index 2e2c6ca..458884c 100644
--- a/diffuse_rs_py/pyproject.toml
+++ b/diffusion_rs_py/pyproject.toml
@@ -3,8 +3,8 @@ requires = ["maturin==1.7"]
build-backend = "maturin"
[project]
-name = "diffuse_rs"
-version = "0.1.3"
+name = "diffusion_rs"
+version = "0.1.0"
requires-python = ">=3.10"
classifiers = [
"Programming Language :: Rust",
diff --git a/diffuse_rs_py/src/lib.rs b/diffusion_rs_py/src/lib.rs
similarity index 83%
rename from diffuse_rs_py/src/lib.rs
rename to diffusion_rs_py/src/lib.rs
index a885f44..2e9f039 100644
--- a/diffuse_rs_py/src/lib.rs
+++ b/diffusion_rs_py/src/lib.rs
@@ -66,7 +66,7 @@ impl DiffusionGenerationParams {
}
#[pyclass]
-pub struct Pipeline(diffuse_rs_core::Pipeline);
+pub struct Pipeline(diffusion_rs_core::Pipeline);
#[pymethods]
impl Pipeline {
@@ -86,21 +86,21 @@ impl Pipeline {
offloading: Option,
) -> PyResult {
let token = token
- .map(diffuse_rs_core::TokenSource::Literal)
- .unwrap_or(diffuse_rs_core::TokenSource::CacheToken);
+ .map(diffusion_rs_core::TokenSource::Literal)
+ .unwrap_or(diffusion_rs_core::TokenSource::CacheToken);
let source = match source {
ModelSource::DdufFile { file } => {
- diffuse_rs_core::ModelSource::dduf(file).map_err(wrap_anyhow_error)?
+ diffusion_rs_core::ModelSource::dduf(file).map_err(wrap_anyhow_error)?
}
ModelSource::ModelId { model_id } => {
- diffuse_rs_core::ModelSource::from_model_id(model_id)
+ diffusion_rs_core::ModelSource::from_model_id(model_id)
}
};
let offloading = offloading.map(|offloading| match offloading {
- Offloading::Full => diffuse_rs_core::Offloading::Full,
+ Offloading::Full => diffusion_rs_core::Offloading::Full,
});
Ok(Self(
- diffuse_rs_core::Pipeline::load(source, silent, token, revision, offloading)
+ diffusion_rs_core::Pipeline::load(source, silent, token, revision, offloading)
.map_err(wrap_anyhow_error)?,
))
}
@@ -114,7 +114,7 @@ impl Pipeline {
.0
.forward(
prompts,
- diffuse_rs_core::DiffusionGenerationParams {
+ diffusion_rs_core::DiffusionGenerationParams {
height: params.height,
width: params.width,
num_steps: params.num_steps,
@@ -138,7 +138,7 @@ impl Pipeline {
}
#[pymodule]
-fn diffuse_rs(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
+fn diffusion_rs(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::()?;
m.add_class::()?;
m.add_class::()?;