From 34a8c8f94468ed4b117f501ef7220f437274b6af Mon Sep 17 00:00:00 2001 From: Eric Buehler <65165915+EricLBuehler@users.noreply.github.com> Date: Tue, 7 Jan 2025 06:11:54 -0500 Subject: [PATCH] chore: diffuse-rs -> diffusion-rs (#26) * Rename diffuse-rs to diffusion-rs * Format * Don't run the code analysis * Reset version --- .github/workflows/analysis.yaml | 64 -------- .github/workflows/docs.yaml | 6 +- .typos.toml | 8 +- Cargo.lock | 36 ++--- Cargo.toml | 18 +-- FEATURE_FLAGS.md | 2 +- INSTALL.md | 52 +++---- README.md | 20 +-- diffuse_rs_backend/README.md | 3 - diffuse_rs_cli/README.md | 22 --- diffuse_rs_common/README.md | 3 - diffuse_rs_py/generate_wheels.sh | 64 -------- .../Cargo.toml | 8 +- diffusion_rs_backend/README.md | 3 + .../build.rs | 0 .../kernels/bitsandbytes/dequant.cu | 0 .../src/bitsandbytes/ffi.rs | 2 +- .../src/bitsandbytes/mod.rs | 34 ++--- .../src/bitsandbytes/op.rs | 144 +++++++++--------- .../src/cublaslt/api.rs | 134 ++++++++-------- .../src/cublaslt/matmul.rs | 8 +- .../src/cublaslt/mod.rs | 10 +- .../src/gguf/mod.rs | 6 +- .../src/lib.rs | 12 +- .../src/metal_kernels/bnb_dequantize.metal | 0 .../src/metal_kernels/mod.rs | 2 +- .../src/metal_kernels/sdpa.metal | 0 .../src/metal_kernels/utils.rs | 0 .../src/ops.rs | 44 +++--- .../src/unquantized/mod.rs | 4 +- .../Cargo.toml | 16 +- diffusion_rs_cli/README.md | 22 +++ .../src/main.rs | 4 +- .../Cargo.toml | 2 +- diffusion_rs_common/README.md | 3 + .../build.rs | 0 .../src/core/LICENSE | 0 .../src/core/accelerate.rs | 0 .../src/core/backend.rs | 0 .../src/core/backprop.rs | 0 .../src/core/conv.rs | 0 .../src/core/convert.rs | 0 .../src/core/cpu/avx.rs | 0 .../src/core/cpu/erf.rs | 0 .../src/core/cpu/kernels.rs | 0 .../src/core/cpu/mod.rs | 0 .../src/core/cpu/neon.rs | 0 .../src/core/cpu/simd128.rs | 0 .../src/core/cpu_backend/mod.rs | 0 .../src/core/cpu_backend/utils.rs | 0 .../src/core/cuda_backend/cudnn.rs | 0 .../src/core/cuda_backend/device.rs | 0 .../src/core/cuda_backend/error.rs | 0 .../src/core/cuda_backend/mod.rs | 0 .../src/core/cuda_backend/utils.rs | 0 .../src/core/custom_op.rs | 0 .../src/core/device.rs | 0 .../src/core/display.rs | 0 .../src/core/dtype.rs | 0 .../src/core/dummy_cuda_backend.rs | 0 .../src/core/dummy_metal_backend.rs | 0 .../src/core/error.rs | 0 .../src/core/indexer.rs | 16 +- .../src/core/layout.rs | 0 .../src/core/metal_backend/device.rs | 0 .../src/core/metal_backend/mod.rs | 0 .../src/core/mkl.rs | 0 .../src/core/mod.rs | 0 .../src/core/npy.rs | 0 .../src/core/op.rs | 0 .../src/core/pickle.rs | 0 .../src/core/quantized/avx.rs | 0 .../src/core/quantized/cuda.rs | 0 .../src/core/quantized/dummy_cuda.rs | 0 .../src/core/quantized/dummy_metal.rs | 0 .../src/core/quantized/ggml_file.rs | 0 .../src/core/quantized/gguf_file.rs | 0 .../src/core/quantized/imatrix_file.rs | 0 .../src/core/quantized/k_quants.rs | 0 .../src/core/quantized/metal.rs | 0 .../src/core/quantized/mod.rs | 0 .../src/core/quantized/neon.rs | 0 .../src/core/quantized/simd128.rs | 0 .../src/core/quantized/utils.rs | 0 .../src/core/safetensors.rs | 0 .../src/core/scalar.rs | 0 .../src/core/shape.rs | 0 .../src/core/sort.rs | 0 .../src/core/storage.rs | 0 .../src/core/streaming.rs | 0 .../src/core/strided_index.rs | 0 .../src/core/tensor.rs | 126 +++++++-------- .../src/core/tensor_cat.rs | 4 +- .../src/core/tensor_indexing.rs | 4 +- .../src/core/test_utils.rs | 0 .../src/core/tests/conv_tests.rs | 4 +- .../src/core/tests/custom_op_tests.rs | 26 ++-- .../src/core/tests/display_tests.rs | 2 +- .../src/core/tests/fortran_tensor_3d.pth | Bin .../src/core/tests/grad_tests.rs | 2 +- .../src/core/tests/indexing_tests.rs | 2 +- .../src/core/tests/layout_tests.rs | 0 .../src/core/tests/matmul_tests.rs | 2 +- .../src/core/tests/npy.py | 0 .../src/core/tests/pool_tests.rs | 2 +- .../src/core/tests/pth.py | 0 .../src/core/tests/pth_tests.rs | 6 +- .../src/core/tests/quantized_tests.rs | 2 +- .../src/core/tests/serialization_tests.rs | 12 +- .../src/core/tests/tensor_tests.rs | 4 +- .../src/core/tests/test.npy | Bin .../src/core/tests/test.npz | Bin .../src/core/tests/test.pt | Bin .../src/core/tests/test_with_key.pt | Bin .../src/core/utils.rs | 0 .../src/core/variable.rs | 0 .../src/cuda_kernels/LICENSE | 0 .../src/cuda_kernels/affine.cu | 0 .../src/cuda_kernels/binary.cu | 0 .../src/cuda_kernels/binary_op_macros.cuh | 0 .../src/cuda_kernels/cast.cu | 0 .../src/cuda_kernels/compatibility.cuh | 0 .../src/cuda_kernels/conv.cu | 0 .../src/cuda_kernels/cuda_utils.cuh | 0 .../src/cuda_kernels/fill.cu | 0 .../src/cuda_kernels/fused_rms_norm.cu | 0 .../src/cuda_kernels/fused_rope.cu | 0 .../src/cuda_kernels/indexing.cu | 0 .../src/cuda_kernels/kvconcat.cu | 0 .../src/cuda_kernels/mod.rs | 0 .../src/cuda_kernels/quantized.cu | 0 .../src/cuda_kernels/reduce.cu | 0 .../src/cuda_kernels/sort.cu | 0 .../src/cuda_kernels/ternary.cu | 0 .../src/cuda_kernels/unary.cu | 0 .../src/lib.rs | 0 .../src/metal_kernels/LICENSE | 0 .../src/metal_kernels/affine.metal | 0 .../src/metal_kernels/binary.metal | 0 .../src/metal_kernels/cast.metal | 0 .../src/metal_kernels/conv.metal | 0 .../src/metal_kernels/fill.metal | 0 .../src/metal_kernels/indexing.metal | 0 .../libMetalFlashAttention.metallib | Bin .../src/metal_kernels/mlx_gemm.metal | 0 .../src/metal_kernels/mod.rs | 0 .../src/metal_kernels/quantized.metal | 0 .../src/metal_kernels/random.metal | 0 .../src/metal_kernels/reduce.metal | 0 .../scaled_dot_product_attention.metal | 0 .../src/metal_kernels/sort.metal | 0 .../src/metal_kernels/ternary.metal | 0 .../src/metal_kernels/tests.rs | 0 .../src/metal_kernels/unary.metal | 0 .../src/metal_kernels/utils.rs | 0 .../src/model_source.rs | 2 +- .../src/nn/LICENSE | 0 .../src/nn/activation.rs | 0 .../src/nn/attention.rs | 0 .../src/nn/batch_norm.rs | 0 .../src/nn/conv.rs | 0 .../src/nn/embedding.rs | 0 .../src/nn/encoding.rs | 12 +- .../src/nn/func.rs | 0 .../src/nn/group_norm.rs | 0 .../src/nn/init.rs | 0 .../src/nn/kv_cache.rs | 0 .../src/nn/layer_norm.rs | 6 +- .../src/nn/linear.rs | 6 +- .../src/nn/loss.rs | 0 .../src/nn/mod.rs | 0 .../src/nn/ops.rs | 8 +- .../src/nn/optim.rs | 0 .../src/nn/rnn.rs | 0 .../src/nn/rope.rs | 0 .../src/nn/rotary_emb.rs | 0 .../src/nn/sequential.rs | 0 .../src/nn/tests/batch_norm.rs | 2 +- .../src/nn/tests/group_norm.rs | 2 +- .../src/nn/tests/kv_cache.rs | 4 +- .../src/nn/tests/layer_norm.rs | 2 +- .../src/nn/tests/loss.rs | 8 +- .../src/nn/tests/one_hot.rs | 2 +- .../src/nn/tests/ops.rs | 42 ++--- .../src/nn/tests/optim.rs | 6 +- .../src/nn/tests/rnn.rs | 10 +- .../src/nn/tests/sdpa.rs | 44 +++--- .../src/nn/var_builder.rs | 6 +- .../src/nn/var_map.rs | 0 .../src/nn_wrap.rs | 0 .../src/progress.rs | 0 .../src/safetensors.rs | 0 .../src/tokenizer.rs | 0 .../src/tokens.rs | 0 .../src/varbuilder.rs | 0 .../src/varbuilder_loading.rs | 0 .../Cargo.toml | 18 +-- .../src/lib.rs | 6 +- .../src/models/clip/mod.rs | 0 .../src/models/clip/text.rs | 67 ++++---- .../src/models/flux/mod.rs | 0 .../src/models/flux/model.rs | 89 ++++++----- .../src/models/mod.rs | 4 +- .../src/models/t5/mod.rs | 16 +- .../src/models/vaes/autoencoder_kl.rs | 10 +- .../src/models/vaes/mod.rs | 4 +- .../src/models/vaes/vae.rs | 46 +++--- .../src/pipelines/flux/mod.rs | 18 +-- .../src/pipelines/flux/sampling.rs | 2 +- .../src/pipelines/mod.rs | 8 +- .../src/pipelines/sampling.rs | 2 +- .../src/pipelines/scheduler.rs | 2 +- .../Cargo.toml | 6 +- .../examples/dduf/README.md | 0 .../examples/dduf/main.rs | 4 +- .../examples/flux/README.md | 0 .../examples/flux/main.rs | 4 +- {diffuse_rs_py => diffusion_rs_py}/Cargo.toml | 16 +- {diffuse_rs_py => diffusion_rs_py}/build.rs | 0 .../diffuse_rs.pyi | 0 .../examples/dduf.py | 2 +- .../examples/flux.py | 2 +- diffusion_rs_py/generate_wheels.sh | 64 ++++++++ .../pyproject.toml | 4 +- {diffuse_rs_py => diffusion_rs_py}/src/lib.rs | 18 +-- 225 files changed, 749 insertions(+), 795 deletions(-) delete mode 100644 .github/workflows/analysis.yaml delete mode 100644 diffuse_rs_backend/README.md delete mode 100644 diffuse_rs_cli/README.md delete mode 100644 diffuse_rs_common/README.md delete mode 100644 diffuse_rs_py/generate_wheels.sh rename {diffuse_rs_backend => diffusion_rs_backend}/Cargo.toml (77%) create mode 100644 diffusion_rs_backend/README.md rename {diffuse_rs_backend => diffusion_rs_backend}/build.rs (100%) rename {diffuse_rs_backend => diffusion_rs_backend}/kernels/bitsandbytes/dequant.cu (100%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/bitsandbytes/ffi.rs (97%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/bitsandbytes/mod.rs (90%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/bitsandbytes/op.rs (82%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/cublaslt/api.rs (70%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/cublaslt/matmul.rs (98%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/cublaslt/mod.rs (91%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/gguf/mod.rs (93%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/lib.rs (94%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/metal_kernels/bnb_dequantize.metal (100%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/metal_kernels/mod.rs (99%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/metal_kernels/sdpa.metal (100%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/metal_kernels/utils.rs (100%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/ops.rs (84%) rename {diffuse_rs_backend => diffusion_rs_backend}/src/unquantized/mod.rs (94%) rename {diffuse_rs_cli => diffusion_rs_cli}/Cargo.toml (57%) create mode 100644 diffusion_rs_cli/README.md rename {diffuse_rs_cli => diffusion_rs_cli}/src/main.rs (97%) rename {diffuse_rs_common => diffusion_rs_common}/Cargo.toml (98%) create mode 100644 diffusion_rs_common/README.md rename {diffuse_rs_common => diffusion_rs_common}/build.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/LICENSE (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/accelerate.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/backend.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/backprop.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/conv.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/convert.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/avx.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/erf.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/kernels.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/mod.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/neon.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu/simd128.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu_backend/mod.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cpu_backend/utils.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cuda_backend/cudnn.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cuda_backend/device.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cuda_backend/error.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cuda_backend/mod.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/cuda_backend/utils.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/custom_op.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/device.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/display.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/dtype.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/dummy_cuda_backend.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/dummy_metal_backend.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/error.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/indexer.rs (93%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/layout.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/metal_backend/device.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/metal_backend/mod.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/mkl.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/mod.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/npy.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/op.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/pickle.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/avx.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/cuda.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/dummy_cuda.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/dummy_metal.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/ggml_file.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/gguf_file.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/imatrix_file.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/k_quants.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/metal.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/mod.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/neon.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/simd128.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/quantized/utils.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/safetensors.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/scalar.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/shape.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/sort.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/storage.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/streaming.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/strided_index.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tensor.rs (96%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tensor_cat.rs (98%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tensor_indexing.rs (99%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/test_utils.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/conv_tests.rs (99%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/custom_op_tests.rs (83%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/display_tests.rs (97%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/fortran_tensor_3d.pth (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/grad_tests.rs (99%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/indexing_tests.rs (99%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/layout_tests.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/matmul_tests.rs (98%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/npy.py (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/pool_tests.rs (97%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/pth.py (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/pth_tests.rs (66%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/quantized_tests.rs (99%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/serialization_tests.rs (76%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/tensor_tests.rs (99%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/test.npy (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/test.npz (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/test.pt (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/tests/test_with_key.pt (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/utils.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/core/variable.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/LICENSE (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/affine.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/binary.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/binary_op_macros.cuh (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/cast.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/compatibility.cuh (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/conv.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/cuda_utils.cuh (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/fill.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/fused_rms_norm.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/fused_rope.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/indexing.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/kvconcat.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/mod.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/quantized.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/reduce.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/sort.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/ternary.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/cuda_kernels/unary.cu (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/lib.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/LICENSE (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/affine.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/binary.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/cast.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/conv.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/fill.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/indexing.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/libMetalFlashAttention.metallib (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/mlx_gemm.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/mod.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/quantized.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/random.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/reduce.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/scaled_dot_product_attention.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/sort.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/ternary.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/tests.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/unary.metal (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/metal_kernels/utils.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/model_source.rs (99%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/LICENSE (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/activation.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/attention.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/batch_norm.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/conv.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/embedding.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/encoding.rs (93%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/func.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/group_norm.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/init.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/kv_cache.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/layer_norm.rs (98%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/linear.rs (94%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/loss.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/mod.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/ops.rs (99%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/optim.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/rnn.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/rope.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/rotary_emb.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/sequential.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/batch_norm.rs (98%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/group_norm.rs (98%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/kv_cache.rs (96%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/layer_norm.rs (97%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/loss.rs (87%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/one_hot.rs (98%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/ops.rs (84%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/optim.rs (96%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/rnn.rs (91%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/tests/sdpa.rs (83%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/var_builder.rs (99%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn/var_map.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/nn_wrap.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/progress.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/safetensors.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/tokenizer.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/tokens.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/varbuilder.rs (100%) rename {diffuse_rs_common => diffusion_rs_common}/src/varbuilder_loading.rs (100%) rename {diffuse_rs_core => diffusion_rs_core}/Cargo.toml (59%) rename {diffuse_rs_core => diffusion_rs_core}/src/lib.rs (81%) rename {diffuse_rs_core => diffusion_rs_core}/src/models/clip/mod.rs (100%) rename {diffuse_rs_core => diffusion_rs_core}/src/models/clip/text.rs (78%) rename {diffuse_rs_core => diffusion_rs_core}/src/models/flux/mod.rs (100%) rename {diffuse_rs_core => diffusion_rs_core}/src/models/flux/model.rs (90%) rename {diffuse_rs_core => diffusion_rs_core}/src/models/mod.rs (93%) rename {diffuse_rs_core => diffusion_rs_core}/src/models/t5/mod.rs (97%) rename {diffuse_rs_core => diffusion_rs_core}/src/models/vaes/autoencoder_kl.rs (93%) rename {diffuse_rs_core => diffusion_rs_core}/src/models/vaes/mod.rs (94%) rename {diffuse_rs_core => diffusion_rs_core}/src/models/vaes/vae.rs (88%) rename {diffuse_rs_core => diffusion_rs_core}/src/pipelines/flux/mod.rs (93%) rename {diffuse_rs_core => diffusion_rs_core}/src/pipelines/flux/sampling.rs (97%) rename {diffuse_rs_core => diffusion_rs_core}/src/pipelines/mod.rs (97%) rename {diffuse_rs_core => diffusion_rs_core}/src/pipelines/sampling.rs (98%) rename {diffuse_rs_core => diffusion_rs_core}/src/pipelines/scheduler.rs (96%) rename {diffuse_rs_examples => diffusion_rs_examples}/Cargo.toml (74%) rename {diffuse_rs_examples => diffusion_rs_examples}/examples/dduf/README.md (100%) rename {diffuse_rs_examples => diffusion_rs_examples}/examples/dduf/main.rs (92%) rename {diffuse_rs_examples => diffusion_rs_examples}/examples/flux/README.md (100%) rename {diffuse_rs_examples => diffusion_rs_examples}/examples/flux/main.rs (94%) rename {diffuse_rs_py => diffusion_rs_py}/Cargo.toml (61%) rename {diffuse_rs_py => diffusion_rs_py}/build.rs (100%) rename {diffuse_rs_py => diffusion_rs_py}/diffuse_rs.pyi (100%) rename {diffuse_rs_py => diffusion_rs_py}/examples/dduf.py (82%) rename {diffuse_rs_py => diffusion_rs_py}/examples/flux.py (83%) create mode 100644 diffusion_rs_py/generate_wheels.sh rename {diffuse_rs_py => diffusion_rs_py}/pyproject.toml (92%) rename {diffuse_rs_py => diffusion_rs_py}/src/lib.rs (83%) diff --git a/.github/workflows/analysis.yaml b/.github/workflows/analysis.yaml deleted file mode 100644 index 52a5fe0..0000000 --- a/.github/workflows/analysis.yaml +++ /dev/null @@ -1,64 +0,0 @@ -name: Analysis -on: - pull_request_target - -jobs: - comment: - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install Rust and Cargo - run: | - curl -sSf https://sh.rustup.rs | sh -s -- -y - source $HOME/.cargo/env - - - name: Install Tokei - run: cargo install tokei - - - name: Run Tokei and get the lines of code - run: tokei . > tokei_output.txt - - - name: Comment or Update PR - uses: actions/github-script@v7 - with: - script: | - const fs = require('fs'); - const tokeiOutput = fs.readFileSync('tokei_output.txt', 'utf8'); - const uniqueIdentifier = 'Code Metrics Report'; - const codeReport = ` -
- ${uniqueIdentifier} -
-              ${tokeiOutput}
-              
-
- `; - - const issue_number = context.issue.number; - const { owner, repo } = context.repo; - - const comments = await github.rest.issues.listComments({ - issue_number, - owner, - repo - }); - - const existingComment = comments.data.find(comment => comment.body.includes(uniqueIdentifier)); - - if (existingComment) { - await github.rest.issues.updateComment({ - owner, - repo, - comment_id: existingComment.id, - body: codeReport - }); - } else { - await github.rest.issues.createComment({ - issue_number, - owner, - repo, - body: codeReport - }); - } diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index f8c776d..71b0971 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -38,17 +38,17 @@ jobs: - name: Build docs run: | rm -rf ./docs - echo "" > target/doc/index.html + echo "" > target/doc/index.html cp -r target/doc ./docs - name: Build Python docs run: | python3 -m venv myenv source myenv/bin/activate pip install maturin[patchelf] pdoc - cd diffuse_rs_py + cd diffusion_rs_py maturin develop cd .. - pdoc diffuse_rs -o ./docs/pyo3 + pdoc diffusion_rs -o ./docs/pyo3 - name: Deploy uses: JamesIves/github-pages-deploy-action@v4 with: diff --git a/.typos.toml b/.typos.toml index d5c733c..c561ee3 100644 --- a/.typos.toml +++ b/.typos.toml @@ -9,8 +9,8 @@ extend-ignore-identifiers-re = [ [files] extend-exclude = [ - "diffuse_rs_common/src/core/*", - "diffuse_rs_common/src/nn/*", - "diffuse_rs_common/src/cuda_kernels/*", - "diffuse_rs_common/src/metal_kernels/*" + "diffusion_rs_common/src/core/*", + "diffusion_rs_common/src/nn/*", + "diffusion_rs_common/src/cuda_kernels/*", + "diffusion_rs_common/src/metal_kernels/*" ] \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 9964528..cc3037f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -663,12 +663,12 @@ dependencies = [ ] [[package]] -name = "diffuse_rs_backend" -version = "0.1.3" +name = "diffusion_rs_backend" +version = "0.1.0" dependencies = [ "bindgen_cuda", "byteorder", - "diffuse_rs_common", + "diffusion_rs_common", "float8", "half", "lazy_static", @@ -682,20 +682,20 @@ dependencies = [ ] [[package]] -name = "diffuse_rs_cli" -version = "0.1.3" +name = "diffusion_rs_cli" +version = "0.1.0" dependencies = [ "anyhow", "clap", "cliclack", - "diffuse_rs_core", + "diffusion_rs_core", "tracing", "tracing-subscriber", ] [[package]] -name = "diffuse_rs_common" -version = "0.1.3" +name = "diffusion_rs_common" +version = "0.1.0" dependencies = [ "accelerate-src", "anyhow", @@ -731,13 +731,13 @@ dependencies = [ ] [[package]] -name = "diffuse_rs_core" -version = "0.1.3" +name = "diffusion_rs_core" +version = "0.1.0" dependencies = [ "anyhow", "clap", - "diffuse_rs_backend", - "diffuse_rs_common", + "diffusion_rs_backend", + "diffusion_rs_common", "float8", "half", "hf-hub", @@ -754,22 +754,22 @@ dependencies = [ ] [[package]] -name = "diffuse_rs_examples" -version = "0.1.3" +name = "diffusion_rs_examples" +version = "0.1.0" dependencies = [ "anyhow", "clap", - "diffuse_rs_core", + "diffusion_rs_core", "tracing", "tracing-subscriber", ] [[package]] -name = "diffuse_rs_py" -version = "0.1.3" +name = "diffusion_rs_py" +version = "0.1.0" dependencies = [ "anyhow", - "diffuse_rs_core", + "diffusion_rs_core", "image", "pyo3", "pyo3-build-config", diff --git a/Cargo.toml b/Cargo.toml index b583c2d..068c60f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,21 +1,21 @@ [workspace] members = [ - "diffuse_rs_core", - "diffuse_rs_examples", - "diffuse_rs_backend", - "diffuse_rs_common", - "diffuse_rs_cli", - "diffuse_rs_py", + "diffusion_rs_core", + "diffusion_rs_examples", + "diffusion_rs_backend", + "diffusion_rs_common", + "diffusion_rs_cli", + "diffusion_rs_py", ] resolver = "2" [workspace.package] -version = "0.1.3" +version = "0.1.0" edition = "2021" authors = ["Eric Buehler"] description = "Blazingly fast inference of diffusion models." -homepage = "https://github.com/EricLBuehler/diffuse-rs" -repository = "https://github.com/EricLBuehler/diffuse-rs" +homepage = "https://github.com/EricLBuehler/diffusion-rs" +repository = "https://github.com/EricLBuehler/diffusion-rs" keywords = ["machine-learning"] categories = ["science"] license = "MIT" diff --git a/FEATURE_FLAGS.md b/FEATURE_FLAGS.md index 4dc36db..d507557 100644 --- a/FEATURE_FLAGS.md +++ b/FEATURE_FLAGS.md @@ -1,6 +1,6 @@ # Feature flags -Diffuse-rs controls building with GPU support or CPU SIMD acceleration with feature flags. +diffusion-rs controls building with GPU support or CPU SIMD acceleration with feature flags. These are set at compile time and are as follows: diff --git a/INSTALL.md b/INSTALL.md index e791d6a..8b161cf 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,4 +1,4 @@ -# Installation guide for diffuse-rs +# Installation guide for diffusion-rs **ToC** - [CLI](#cli) @@ -8,7 +8,7 @@ - [Rust crate](#rust-crate) ## CLI -1) Installing diffuse-rs via the CLI requires a few prerequisites: +1) Installing diffusion-rs via the CLI requires a few prerequisites: - Install the Rust programming language - Follow the instructions on this site: https://rustup.rs/ - (*Linux/Mac only*) Install OpenSSL (*Ubuntu:* `sudo apt install libssl-dev`, *Brew:* `brew install openssl`) @@ -18,13 +18,13 @@ - Install the necessary tool: `pip install huggingface_hub` - Login: `huggingface_cli login` -3) Install the `diffuse_rs_cli` CLI +3) Install the `diffusion_rs_cli` CLI > [!NOTE] > Replace the `...` below with [feature flags](FEATURE_FLAGS.md) to build for Nvidia GPUs (CUDA) or Apple Silicon GPUs (Metal) ``` -cargo install diffuse_rs_cli --features ... +cargo install diffusion_rs_cli --features ... ``` 4) Try the CLI! @@ -32,11 +32,11 @@ cargo install diffuse_rs_cli --features ... > Download the DDUF file here: `wget https://huggingface.co/DDUF/FLUX.1-dev-DDUF/resolve/main/FLUX.1-dev-Q4-bnb.dduf` ``` -diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf +diffusion_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf ``` ## CLI from source -1) Installing diffuse-rs via the CLI requires a few prerequisites: +1) Installing diffusion-rs via the CLI requires a few prerequisites: - Install the Rust programming language - Follow the instructions on this site: https://rustup.rs/ - (*Linux/Mac only*) Install OpenSSL (*Ubuntu:* `sudo apt install libssl-dev`, *Brew:* `brew install openssl`) @@ -48,17 +48,17 @@ diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf 3) Clone the repository ``` -git clone https://github.com/EricLBuehler/diffuse-rs.git -cd diffuse-rs +git clone https://github.com/EricLBuehler/diffusion-rs.git +cd diffusion-rs ``` -4) Install the `diffuse_rs_cli` CLI +4) Install the `diffusion_rs_cli` CLI > [!NOTE] > Replace the `...` below with [feature flags](FEATURE_FLAGS.md) to build for Nvidia GPUs (CUDA) or Apple Silicon GPUs (Metal) ``` -cargo install --path diffuse_rs_cli --release --features ... +cargo install --path diffusion_rs_cli --release --features ... ``` 5) Try the CLI! @@ -66,11 +66,11 @@ cargo install --path diffuse_rs_cli --release --features ... > Download the DDUF file here: `wget https://huggingface.co/DDUF/FLUX.1-dev-DDUF/resolve/main/FLUX.1-dev-Q4-bnb.dduf` ``` -diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf +diffusion_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf ``` ## Python bindings -1) Installing diffuse-rs via the Python bindings requires a few prerequisites: +1) Installing diffusion-rs via the Python bindings requires a few prerequisites: - Install the Rust programming language - Follow the instructions on this site: https://rustup.rs/ - (*Linux/Mac only*) Install OpenSSL (*Ubuntu:* `sudo apt install libssl-dev`, *Brew:* `brew install openssl`) @@ -84,18 +84,18 @@ diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf |Feature|Flag| |--|--| -|Nvidia GPUs (CUDA)|`pip install diffuse_rs_cuda`| -|Apple Silicon GPUs (Metal)|`pip install diffuse_rs_metal`| -|Apple Accelerate (CPU)|`pip install diffuse_rs_accelerate`| -|Intel MKL (CPU)|`pip install diffuse_rs_mkl`| -|Use AVX or NEON automatically|`pip install diffuse_rs`| +|Nvidia GPUs (CUDA)|`pip install diffusion_rs_cuda`| +|Apple Silicon GPUs (Metal)|`pip install diffusion_rs_metal`| +|Apple Accelerate (CPU)|`pip install diffusion_rs_accelerate`| +|Intel MKL (CPU)|`pip install diffusion_rs_mkl`| +|Use AVX or NEON automatically|`pip install diffusion_rs`| 4) Try the Python bindings! > Download the DDUF file here: `wget https://huggingface.co/DDUF/FLUX.1-dev-DDUF/resolve/main/FLUX.1-dev-Q4-bnb.dduf` ```py -from diffuse_rs import DiffusionGenerationParams, ModelSource, Pipeline +from diffusion_rs import DiffusionGenerationParams, ModelSource, Pipeline from PIL import Image import io @@ -113,7 +113,7 @@ image.show() ``` ## Python bindings from source -1) Installing diffuse-rs via the Python bindings requires a few prerequisites: +1) Installing diffusion-rs via the Python bindings requires a few prerequisites: - Install the Rust programming language - Follow the instructions on this site: https://rustup.rs/ - (*Linux/Mac only*) Install OpenSSL (*Ubuntu:* `sudo apt install libssl-dev`, *Brew:* `brew install openssl`) @@ -125,8 +125,8 @@ image.show() 3) Clone the repository ``` -git clone https://github.com/EricLBuehler/diffuse-rs.git -cd diffuse-rs +git clone https://github.com/EricLBuehler/diffusion-rs.git +cd diffusion-rs ``` 4) Install the maturin build tool @@ -140,7 +140,7 @@ pip install maturin > Replace the `...` below with [feature flags](FEATURE_FLAGS.md) to build for Nvidia GPUs (CUDA) or Apple Silicon GPUs (Metal) ``` -maturin develop -m diffuse_rs_py/Cargo.toml --release --features ... +maturin develop -m diffusion_rs_py/Cargo.toml --release --features ... ``` 6) Try the Python bindings! @@ -148,7 +148,7 @@ maturin develop -m diffuse_rs_py/Cargo.toml --release --features ... > Download the DDUF file here: `wget https://huggingface.co/DDUF/FLUX.1-dev-DDUF/resolve/main/FLUX.1-dev-Q4-bnb.dduf` ```py -from diffuse_rs import DiffusionGenerationParams, ModelSource, Pipeline +from diffusion_rs import DiffusionGenerationParams, ModelSource, Pipeline from PIL import Image import io @@ -166,7 +166,7 @@ image.show() ``` ## Rust crate -1) Installing diffuse-rs for usage as a Rust crate requires a few prerequisites: +1) Installing diffusion-rs for usage as a Rust crate requires a few prerequisites: - Install the Rust programming language - Follow the instructions on this site: https://rustup.rs/ - (*Linux/Mac only*) Install OpenSSL (*Ubuntu:* `sudo apt install libssl-dev`, *Brew:* `brew install openssl`) @@ -177,5 +177,5 @@ image.show() - Login: `huggingface_cli login` 3) Add the dependency to your `Cargo.toml` - - Run: `cargo add diffuse_rs_core` - - Alternatively, you can add the git dependency to your Cargo.toml for the latest updates: `diffuse_rs_core = { git = "https://github.com/EricLBuehler/diffuse-rs.git", version = "0.1.0" }` + - Run: `cargo add diffusion_rs_core` + - Alternatively, you can add the git dependency to your Cargo.toml for the latest updates: `diffusion_rs_core = { git = "https://github.com/EricLBuehler/diffusion-rs.git", version = "0.1.0" }` diff --git a/README.md b/README.md index c4c2f18..835a259 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@

- diffuse-rs + diffusion-rs

@@ -8,7 +8,7 @@ Blazingly fast inference of diffusion models.

-| Rust Documentation | Python Documentation | Discord | +| Rust Documentation | Python Documentation | Discord |

@@ -22,7 +22,7 @@ Blazingly fast inference of diffusion models. - AVX support for x86 CPUs - Allow acceleration of models larger than the total VRAM size with offloading -Please do not hesitate to contact us with feature requests via [Github issues](https://github.com/EricLBuehler/diffuse-rs/issues)! +Please do not hesitate to contact us with feature requests via [Github issues](https://github.com/EricLBuehler/diffusion-rs/issues)! ## Upcoming features - 🚧 LoRA support @@ -38,17 +38,17 @@ After [installing](#installation), you can try out these examples! **CLI:** ```bash -diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf +diffusion_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf ``` -More CLI examples [here](diffuse_rs_cli/README.md). +More CLI examples [here](diffusion_rs_cli/README.md). **Python:** -More Python examples [here](diffuse_rs_py/examples). +More Python examples [here](diffusion_rs_py/examples). ```py -from diffuse_rs import DiffusionGenerationParams, ModelSource, Pipeline +from diffusion_rs import DiffusionGenerationParams, ModelSource, Pipeline from PIL import Image import io @@ -67,12 +67,12 @@ image.show() **Rust crate:** -Examples with the Rust crate: [here](diffuse_rs_examples/examples). +Examples with the Rust crate: [here](diffusion_rs_examples/examples). ```rust use std::time::Instant; -use diffuse_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource}; +use diffusion_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource}; use tracing::level_filters::LevelFilter; use tracing_subscriber::EnvFilter; @@ -115,5 +115,5 @@ images[0].save("image.png")?; ## Contributing - Anyone is welcome to contribute by opening PRs - - See [good first issues](https://github.com/EricLBuehler/diffuse-rs/labels/good%20first%20issue) for a starting point! + - See [good first issues](https://github.com/EricLBuehler/diffusion-rs/labels/good%20first%20issue) for a starting point! - Collaborators will be invited based on past contributions diff --git a/diffuse_rs_backend/README.md b/diffuse_rs_backend/README.md deleted file mode 100644 index 97083ae..0000000 --- a/diffuse_rs_backend/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# `diffuse_rs_backend` - -Backend for quantization in diffuse-rs. \ No newline at end of file diff --git a/diffuse_rs_cli/README.md b/diffuse_rs_cli/README.md deleted file mode 100644 index 2f5a1c8..0000000 --- a/diffuse_rs_cli/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# `diffuse_rs_cli` - -CLI for diffuse-rs. - -## Examples -- FLUX dev: -``` -diffuse_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf -``` - -``` -diffuse_rs_cli --scale 3.5 --num-steps 50 model-id -m black-forest-labs/FLUX.1-dev -``` - -- FLUX schnell: -``` -diffuse_rs_cli --scale 0.0 --num-steps 4 dduf -f FLUX.1-schnell-Q8-bnb.dduf -``` - -``` -diffuse_rs_cli --scale 0.0 --num-steps 4 model-id -m black-forest-labs/FLUX.1-dev -``` \ No newline at end of file diff --git a/diffuse_rs_common/README.md b/diffuse_rs_common/README.md deleted file mode 100644 index 2cb940d..0000000 --- a/diffuse_rs_common/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# `diffuse_rs_common` - -Common functionality for diffuse-rs, including core ML framework based on Candle, NN functionality, and DDUF loading. \ No newline at end of file diff --git a/diffuse_rs_py/generate_wheels.sh b/diffuse_rs_py/generate_wheels.sh deleted file mode 100644 index 4ee229a..0000000 --- a/diffuse_rs_py/generate_wheels.sh +++ /dev/null @@ -1,64 +0,0 @@ -################################### -### UPLOADING -################################### - -# ⚠️⚠️⚠️⚠️ Be sure to update the `project.name` field in `pyproject.toml`!! ⚠️⚠️⚠️⚠️ -# diffuse_rs, diffuse_rs_cuda, diffuse_rs_metal, diffuse_rs_mkl, diffuse_rs_accelerate - -## testpypi: -# twine upload --repository pypi --password PASSWORD --username __token__ wheels-NAME/*.whl - - -## pypi: -# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cuda/*.whl -# twine upload --repository pypi --password PASSWORD --username __token__ wheels-mkl/*.whl -# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cuda/*.whl -# twine upload --repository pypi --password PASSWORD --username __token__ wheels-metal/*.whl -# ⚠️ Need both x86_64 and aarch64 builds before this! ⚠️ -# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cpu/*.whl - - -################################### -#### MAC: Aarch64 Manylinux and OSX -################################### - -docker build -t wheelmaker:latest -f Dockerfile.manylinux . -docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.10 -docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.11 -docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.12 - -maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.10 -maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.11 -maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.12 - -# Metal - -maturin build -o wheels-metal -m diffuse_rs_py/Cargo.toml --interpreter python3.10 --features metal -maturin build -o wheels-metal -m diffuse_rs_py/Cargo.toml --interpreter python3.11 --features metal -maturin build -o wheels-metal -m diffuse_rs_py/Cargo.toml --interpreter python3.12 --features metal - -# Accelerate - -maturin build -o wheels-accelerate -m diffuse_rs_py/Cargo.toml --interpreter python3.10 --features accelerate -maturin build -o wheels-accelerate -m diffuse_rs_py/Cargo.toml --interpreter python3.11 --features accelerate -maturin build -o wheels-accelerate -m diffuse_rs_py/Cargo.toml --interpreter python3.12 --features accelerate - -#################################### -# WINDOWS: x86_64 Manylinux, Windows -#################################### - -maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.10 -maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.11 -maturin build -o wheels-cpu -m diffuse_rs_py/Cargo.toml --interpreter python3.12 - -# CUDA - -maturin build -o wheels-cuda -m diffuse_rs_py/Cargo.toml --interpreter python3.10 --features cuda -maturin build -o wheels-cuda -m diffuse_rs_py/Cargo.toml --interpreter python3.11 --features cuda -maturin build -o wheels-cuda -m diffuse_rs_py/Cargo.toml --interpreter python3.12 --features cuda - -# MKL - -maturin build -o wheels-mkl -m diffuse_rs_py/Cargo.toml --interpreter python3.10 --features mkl -maturin build -o wheels-mkl -m diffuse_rs_py/Cargo.toml --interpreter python3.11 --features mkl -maturin build -o wheels-mkl -m diffuse_rs_py/Cargo.toml --interpreter python3.12 --features mkl diff --git a/diffuse_rs_backend/Cargo.toml b/diffusion_rs_backend/Cargo.toml similarity index 77% rename from diffuse_rs_backend/Cargo.toml rename to diffusion_rs_backend/Cargo.toml index 9a125ad..23f0f1e 100644 --- a/diffuse_rs_backend/Cargo.toml +++ b/diffusion_rs_backend/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "diffuse_rs_backend" +name = "diffusion_rs_backend" readme = "README.md" authors.workspace = true version.workspace = true @@ -23,11 +23,11 @@ thiserror.workspace = true lazy_static.workspace = true paste.workspace = true byteorder.workspace = true -diffuse_rs_common = { path = "../diffuse_rs_common" } +diffusion_rs_common = { path = "../diffusion_rs_common" } [features] -cuda = ["dep:bindgen_cuda", "diffuse_rs_common/cuda"] -metal = ["dep:metal", "diffuse_rs_common/metal"] +cuda = ["dep:bindgen_cuda", "diffusion_rs_common/cuda"] +metal = ["dep:metal", "diffusion_rs_common/metal"] [build-dependencies] bindgen_cuda = { version = "0.1.5", optional = true } diff --git a/diffusion_rs_backend/README.md b/diffusion_rs_backend/README.md new file mode 100644 index 0000000..4b8d3dd --- /dev/null +++ b/diffusion_rs_backend/README.md @@ -0,0 +1,3 @@ +# `diffusion_rs_backend` + +Backend for quantization in diffusion-rs. \ No newline at end of file diff --git a/diffuse_rs_backend/build.rs b/diffusion_rs_backend/build.rs similarity index 100% rename from diffuse_rs_backend/build.rs rename to diffusion_rs_backend/build.rs diff --git a/diffuse_rs_backend/kernels/bitsandbytes/dequant.cu b/diffusion_rs_backend/kernels/bitsandbytes/dequant.cu similarity index 100% rename from diffuse_rs_backend/kernels/bitsandbytes/dequant.cu rename to diffusion_rs_backend/kernels/bitsandbytes/dequant.cu diff --git a/diffuse_rs_backend/src/bitsandbytes/ffi.rs b/diffusion_rs_backend/src/bitsandbytes/ffi.rs similarity index 97% rename from diffuse_rs_backend/src/bitsandbytes/ffi.rs rename to diffusion_rs_backend/src/bitsandbytes/ffi.rs index d7f393d..df6da23 100644 --- a/diffuse_rs_backend/src/bitsandbytes/ffi.rs +++ b/diffusion_rs_backend/src/bitsandbytes/ffi.rs @@ -1,4 +1,4 @@ -use diffuse_rs_common::core::cuda::cudarc::driver::sys::CUstream; +use diffusion_rs_common::core::cuda::cudarc::driver::sys::CUstream; use half::{bf16, f16}; #[allow(dead_code)] diff --git a/diffuse_rs_backend/src/bitsandbytes/mod.rs b/diffusion_rs_backend/src/bitsandbytes/mod.rs similarity index 90% rename from diffuse_rs_backend/src/bitsandbytes/mod.rs rename to diffusion_rs_backend/src/bitsandbytes/mod.rs index e149b1e..02ee1e3 100644 --- a/diffuse_rs_backend/src/bitsandbytes/mod.rs +++ b/diffusion_rs_backend/src/bitsandbytes/mod.rs @@ -1,7 +1,7 @@ use std::sync::Arc; -use diffuse_rs_common::core::{DType, Device, Result, Shape, Tensor}; -use diffuse_rs_common::VarBuilder; +use diffusion_rs_common::core::{DType, Device, Result, Shape, Tensor}; +use diffusion_rs_common::VarBuilder; use serde::Deserialize; use crate::{QuantMethod, QuantMethodConfig}; @@ -117,7 +117,7 @@ impl BnbLinear { { Self::linear_4bit(in_dim, out_dim, bias, vb) } else { - diffuse_rs_common::bail!("`BnbLinear` expects fp4/nf4 or int8 layers."); + diffusion_rs_common::bail!("`BnbLinear` expects fp4/nf4 or int8 layers."); } } @@ -142,7 +142,7 @@ impl BnbLinear { if !vb_w.contains_tensor("quant_state.bitsandbytes__nf4") && !vb_w.contains_tensor("quant_state.bitsandbytes__fp4") { - diffuse_rs_common::bail!("`BnbLinear` expects either `...__nf4` or `...__fp4` tensors, this means the layer is not 4bit or 8big."); + diffusion_rs_common::bail!("`BnbLinear` expects either `...__nf4` or `...__fp4` tensors, this means the layer is not 4bit or 8big."); } let quant_ty = if vb_w.contains_tensor("quant_state.bitsandbytes__nf4") { @@ -163,29 +163,27 @@ impl BnbLinear { BnbQuantType::Int8 => None, }; let Some(state) = state else { - diffuse_rs_common::bail!("Only fp8/nf4 quantization is supported for now.") + diffusion_rs_common::bail!("Only fp8/nf4 quantization is supported for now.") }; let state_str = String::from_utf8(state.to_vec1::()?)?; let state: BnbQuantState = - serde_json::from_str(&state_str).map_err(diffuse_rs_common::core::Error::msg)?; + serde_json::from_str(&state_str).map_err(diffusion_rs_common::core::Error::msg)?; let nested = if vb_w.contains_tensor("nested_absmax") { // TODO: can `nested_blocksize` be None, default to 64 like bnb? Some(Arc::new(BnbQuantParmas { absmax: vb_w.get_unchecked_dtype("nested_absmax", DType::F32)?, code: vb_w.get_unchecked_dtype("nested_quant_map", DType::F32)?, - blocksize: state - .nested_blocksize - .ok_or(diffuse_rs_common::core::Error::debug( - "`nested_blocksize` must be present.", - ))?, + blocksize: state.nested_blocksize.ok_or( + diffusion_rs_common::core::Error::debug("`nested_blocksize` must be present."), + )?, shape: None, nested: None, offset: None, // Put it in the outer one! dtype: state .nested_dtype - .ok_or(diffuse_rs_common::core::Error::debug( + .ok_or(diffusion_rs_common::core::Error::debug( "`nested_dtype` must be present.", ))?, })) @@ -233,16 +231,18 @@ impl BnbLinear { if let Some(nested) = ¶ms.nested { absmax = Self::dequantize_4bit(¶ms.absmax, nested, BnbQuantType::Int8)?; absmax = (absmax - + params.offset.ok_or(diffuse_rs_common::core::Error::debug( - "`offset` must be present.", - ))?)?; + + params + .offset + .ok_or(diffusion_rs_common::core::Error::debug( + "`offset` must be present.", + ))?)?; } let out_shape = params.shape.clone().unwrap_or(input.shape().clone()); let out_dtype: DType = params.dtype.into(); if !SUPPORTED_BLOCKSIZE.contains(¶ms.blocksize) { - diffuse_rs_common::bail!( + diffusion_rs_common::bail!( "Blocksize of {} is not supported, {SUPPORTED_BLOCKSIZE:?} are.", params.blocksize ); @@ -262,7 +262,7 @@ impl BnbLinear { } impl QuantMethod for BnbLinear { - fn new(method: QuantMethodConfig) -> diffuse_rs_common::core::Result + fn new(method: QuantMethodConfig) -> diffusion_rs_common::core::Result where Self: Sized, { diff --git a/diffuse_rs_backend/src/bitsandbytes/op.rs b/diffusion_rs_backend/src/bitsandbytes/op.rs similarity index 82% rename from diffuse_rs_backend/src/bitsandbytes/op.rs rename to diffusion_rs_backend/src/bitsandbytes/op.rs index 2fda5a9..426793f 100644 --- a/diffuse_rs_backend/src/bitsandbytes/op.rs +++ b/diffusion_rs_backend/src/bitsandbytes/op.rs @@ -3,12 +3,12 @@ use std::fmt::Debug; #[cfg(feature = "cuda")] -use diffuse_rs_common::core::cuda::{ +use diffusion_rs_common::core::cuda::{ cudarc::driver::{sys::CUstream, CudaSlice, CudaView, DeviceRepr, ValidAsZeroBits}, CudaDevice, }; -use diffuse_rs_common::core::{ +use diffusion_rs_common::core::{ backend::BackendStorage, CpuStorage, CustomOp2, CustomOp3, DType, Result, Shape, Tensor, WithDType, }; @@ -209,7 +209,7 @@ impl DequantizeOp { dev: &CudaDevice, kernel: unsafe extern "C" fn(*const f32, *const u8, *const f32, *mut T, i32, i32, CUstream), ) -> Result> { - use diffuse_rs_common::core::cuda::{cudarc::driver::DevicePtr, WrapErr}; + use diffusion_rs_common::core::cuda::{cudarc::driver::DevicePtr, WrapErr}; let out = unsafe { dev.alloc::(self.shape.elem_count()).w()? }; unsafe { @@ -236,14 +236,14 @@ impl CustomOp3 for DequantizeOp { fn cpu_fwd( &self, input_s: &CpuStorage, - input_l: &diffuse_rs_common::core::Layout, + input_l: &diffusion_rs_common::core::Layout, absmax_s: &CpuStorage, - absmax_l: &diffuse_rs_common::core::Layout, + absmax_l: &diffusion_rs_common::core::Layout, code_s: &CpuStorage, - code_l: &diffuse_rs_common::core::Layout, - ) -> diffuse_rs_common::core::Result<(CpuStorage, diffuse_rs_common::core::Shape)> { + code_l: &diffusion_rs_common::core::Layout, + ) -> diffusion_rs_common::core::Result<(CpuStorage, diffusion_rs_common::core::Shape)> { if !(input_l.is_contiguous() && absmax_l.is_contiguous() && code_l.is_contiguous()) { - diffuse_rs_common::bail!("All inputs must be contiguous"); + diffusion_rs_common::bail!("All inputs must be contiguous"); } match (input_s, absmax_s, code_s, self.out_ty) { ( @@ -273,7 +273,7 @@ impl CustomOp3 for DequantizeOp { CpuStorage::F32(self.dequantize_cpu(input, absmax, code, self.quant_ty)), self.shape.clone(), )), - (i, a, c, t) => diffuse_rs_common::bail!( + (i, a, c, t) => diffusion_rs_common::bail!( "Unsupported dtypes for cpu dequant: {:?} input, {:?} absmax, {:?} code, {:?} out", i.dtype(), a.dtype(), @@ -286,15 +286,15 @@ impl CustomOp3 for DequantizeOp { #[cfg(feature = "cuda")] fn cuda_fwd( &self, - input_s: &diffuse_rs_common::core::CudaStorage, - input_l: &diffuse_rs_common::core::Layout, - absmax_s: &diffuse_rs_common::core::CudaStorage, - absmax_l: &diffuse_rs_common::core::Layout, - code_s: &diffuse_rs_common::core::CudaStorage, - code_l: &diffuse_rs_common::core::Layout, - ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> { + input_s: &diffusion_rs_common::core::CudaStorage, + input_l: &diffusion_rs_common::core::Layout, + absmax_s: &diffusion_rs_common::core::CudaStorage, + absmax_l: &diffusion_rs_common::core::Layout, + code_s: &diffusion_rs_common::core::CudaStorage, + code_l: &diffusion_rs_common::core::Layout, + ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> { if !(input_l.is_contiguous() && absmax_l.is_contiguous() && code_l.is_contiguous()) { - diffuse_rs_common::bail!("All inputs must be contiguous"); + diffusion_rs_common::bail!("All inputs must be contiguous"); } let input_slice = input_s .as_cuda_slice::()? @@ -308,7 +308,7 @@ impl CustomOp3 for DequantizeOp { let dev = input_s.device().clone(); let out = match (self.out_ty, self.quant_ty) { (BnbDType::F32, BnbQuantType::Nf4) => { - diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel::( input_slice, code_slice, @@ -320,7 +320,7 @@ impl CustomOp3 for DequantizeOp { ) } (BnbDType::F16, BnbQuantType::Nf4) => { - diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel::( input_slice, code_slice, @@ -332,7 +332,7 @@ impl CustomOp3 for DequantizeOp { ) } (BnbDType::BF16, BnbQuantType::Nf4) => { - diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel::( input_slice, code_slice, @@ -345,7 +345,7 @@ impl CustomOp3 for DequantizeOp { } (BnbDType::F32, BnbQuantType::Fp4) => { - diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel::( input_slice, code_slice, @@ -357,7 +357,7 @@ impl CustomOp3 for DequantizeOp { ) } (BnbDType::F16, BnbQuantType::Fp4) => { - diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel::( input_slice, code_slice, @@ -369,7 +369,7 @@ impl CustomOp3 for DequantizeOp { ) } (BnbDType::BF16, BnbQuantType::Fp4) => { - diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel::( input_slice, code_slice, @@ -382,7 +382,7 @@ impl CustomOp3 for DequantizeOp { } (BnbDType::F32, BnbQuantType::Int8) => { - diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel::( input_slice, code_slice, @@ -394,7 +394,7 @@ impl CustomOp3 for DequantizeOp { ) } (BnbDType::F16, BnbQuantType::Int8) => { - diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel::( input_slice, code_slice, @@ -406,7 +406,7 @@ impl CustomOp3 for DequantizeOp { ) } (BnbDType::BF16, BnbQuantType::Int8) => { - diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel::( input_slice, code_slice, @@ -425,17 +425,17 @@ impl CustomOp3 for DequantizeOp { #[cfg(feature = "metal")] fn metal_fwd( &self, - input_s: &diffuse_rs_common::core::MetalStorage, - input_l: &diffuse_rs_common::core::Layout, - absmax_s: &diffuse_rs_common::core::MetalStorage, - absmax_l: &diffuse_rs_common::core::Layout, - code_s: &diffuse_rs_common::core::MetalStorage, - code_l: &diffuse_rs_common::core::Layout, - ) -> Result<(diffuse_rs_common::core::MetalStorage, Shape)> { - use diffuse_rs_common::core::DType; + input_s: &diffusion_rs_common::core::MetalStorage, + input_l: &diffusion_rs_common::core::Layout, + absmax_s: &diffusion_rs_common::core::MetalStorage, + absmax_l: &diffusion_rs_common::core::Layout, + code_s: &diffusion_rs_common::core::MetalStorage, + code_l: &diffusion_rs_common::core::Layout, + ) -> Result<(diffusion_rs_common::core::MetalStorage, Shape)> { + use diffusion_rs_common::core::DType; if !(input_l.is_contiguous() && absmax_l.is_contiguous() && code_l.is_contiguous()) { - diffuse_rs_common::bail!("All inputs must be contiguous"); + diffusion_rs_common::bail!("All inputs must be contiguous"); } let command_buffer = input_s.device().command_buffer()?; @@ -450,13 +450,13 @@ impl CustomOp3 for DequantizeOp { )?; if input_s.dtype() != DType::U8 { - diffuse_rs_common::bail!("input must be u8"); + diffusion_rs_common::bail!("input must be u8"); } if code_s.dtype() != DType::F32 { - diffuse_rs_common::bail!("code must be f32"); + diffusion_rs_common::bail!("code must be f32"); } if absmax_s.dtype() != DType::F32 { - diffuse_rs_common::bail!("absmax must be f32"); + diffusion_rs_common::bail!("absmax must be f32"); } match self.quant_ty { @@ -475,7 +475,7 @@ impl CustomOp3 for DequantizeOp { self.blocksize, self.n, ) - .map_err(diffuse_rs_common::core::Error::wrap)?, + .map_err(diffusion_rs_common::core::Error::wrap)?, BnbQuantType::Fp4 => crate::metal_kernels::call_dequant_bnb_fp4( device.device(), &command_buffer, @@ -491,7 +491,7 @@ impl CustomOp3 for DequantizeOp { self.blocksize, self.n, ) - .map_err(diffuse_rs_common::core::Error::wrap)?, + .map_err(diffusion_rs_common::core::Error::wrap)?, BnbQuantType::Int8 => crate::metal_kernels::call_dequant_bnb_int8( device.device(), &command_buffer, @@ -507,10 +507,10 @@ impl CustomOp3 for DequantizeOp { self.blocksize, self.n, ) - .map_err(diffuse_rs_common::core::Error::wrap)?, + .map_err(diffusion_rs_common::core::Error::wrap)?, }; - let newstorage = diffuse_rs_common::core::MetalStorage::new( + let newstorage = diffusion_rs_common::core::MetalStorage::new( output, device.clone(), self.shape.elem_count(), @@ -574,7 +574,7 @@ impl Dequantize8BitOp { dev: &CudaDevice, kernel: unsafe extern "C" fn(*const i8, *const f32, *mut T, i32, i32, i32), ) -> Result> { - use diffuse_rs_common::core::cuda::{cudarc::driver::DevicePtr, WrapErr}; + use diffusion_rs_common::core::cuda::{cudarc::driver::DevicePtr, WrapErr}; let out = unsafe { dev.alloc::(n as usize).w()? }; unsafe { @@ -600,19 +600,19 @@ impl CustomOp2 for Dequantize8BitOp { fn cpu_fwd( &self, weight_s: &CpuStorage, - weight_l: &diffuse_rs_common::core::Layout, + weight_l: &diffusion_rs_common::core::Layout, scb_s: &CpuStorage, - scb_l: &diffuse_rs_common::core::Layout, - ) -> diffuse_rs_common::core::Result<(CpuStorage, diffuse_rs_common::core::Shape)> { + scb_l: &diffusion_rs_common::core::Layout, + ) -> diffusion_rs_common::core::Result<(CpuStorage, diffusion_rs_common::core::Shape)> { if !(weight_l.is_contiguous() && scb_l.is_contiguous()) { - diffuse_rs_common::bail!("All inputs must be contiguous"); + diffusion_rs_common::bail!("All inputs must be contiguous"); } let row = weight_l.dim(0)?; let col = weight_l.dim(1)?; if row != scb_l.dim(0)? { - diffuse_rs_common::bail!("scb dim0 must match weight dim0"); + diffusion_rs_common::bail!("scb dim0 must match weight dim0"); } match (weight_s, scb_s, self.out_ty) { @@ -628,7 +628,7 @@ impl CustomOp2 for Dequantize8BitOp { CpuStorage::F32(self.dequantize_cpu(weight, scb, col)), weight_l.shape().clone(), )), - (w, s, t) => diffuse_rs_common::bail!( + (w, s, t) => diffusion_rs_common::bail!( "Unsupported dtypes for cpu dequant: {:?} weight, {:?} scb, {:?} out", w.dtype(), s.dtype(), @@ -640,13 +640,13 @@ impl CustomOp2 for Dequantize8BitOp { #[cfg(feature = "cuda")] fn cuda_fwd( &self, - weight_s: &diffuse_rs_common::core::CudaStorage, - weight_l: &diffuse_rs_common::core::Layout, - scb_s: &diffuse_rs_common::core::CudaStorage, - scb_l: &diffuse_rs_common::core::Layout, - ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> { + weight_s: &diffusion_rs_common::core::CudaStorage, + weight_l: &diffusion_rs_common::core::Layout, + scb_s: &diffusion_rs_common::core::CudaStorage, + scb_l: &diffusion_rs_common::core::Layout, + ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> { if !(weight_l.is_contiguous() && scb_l.is_contiguous()) { - diffuse_rs_common::bail!("All inputs must be contiguous"); + diffusion_rs_common::bail!("All inputs must be contiguous"); } let weight_slice = weight_s .as_cuda_slice::()? @@ -659,11 +659,11 @@ impl CustomOp2 for Dequantize8BitOp { let n = weight_l.shape().elem_count() as i32; if row != scb_l.dim(0)? as i32 { - diffuse_rs_common::bail!("scb dim0 must match weight dim0"); + diffusion_rs_common::bail!("scb dim0 must match weight dim0"); } let out = match self.out_ty { - DType::F32 => diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + DType::F32 => diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel( weight_slice, scb_slice, @@ -675,7 +675,7 @@ impl CustomOp2 for Dequantize8BitOp { )?, dev, ), - DType::F16 => diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + DType::F16 => diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel( weight_slice, scb_slice, @@ -687,7 +687,7 @@ impl CustomOp2 for Dequantize8BitOp { )?, dev, ), - DType::BF16 => diffuse_rs_common::core::CudaStorage::wrap_cuda_slice( + DType::BF16 => diffusion_rs_common::core::CudaStorage::wrap_cuda_slice( self.dispatch_cuda_kernel( weight_slice, scb_slice, @@ -699,7 +699,7 @@ impl CustomOp2 for Dequantize8BitOp { )?, dev, ), - _ => diffuse_rs_common::bail!("only f32/bf16/f16 are allowed in dequantize-8bit-op"), + _ => diffusion_rs_common::bail!("only f32/bf16/f16 are allowed in dequantize-8bit-op"), }; Ok((out, weight_l.shape().clone())) @@ -708,15 +708,15 @@ impl CustomOp2 for Dequantize8BitOp { #[cfg(feature = "metal")] fn metal_fwd( &self, - weight_s: &diffuse_rs_common::core::MetalStorage, - weight_l: &diffuse_rs_common::core::Layout, - scb_s: &diffuse_rs_common::core::MetalStorage, - scb_l: &diffuse_rs_common::core::Layout, - ) -> Result<(diffuse_rs_common::core::MetalStorage, Shape)> { - use diffuse_rs_common::core::DType; + weight_s: &diffusion_rs_common::core::MetalStorage, + weight_l: &diffusion_rs_common::core::Layout, + scb_s: &diffusion_rs_common::core::MetalStorage, + scb_l: &diffusion_rs_common::core::Layout, + ) -> Result<(diffusion_rs_common::core::MetalStorage, Shape)> { + use diffusion_rs_common::core::DType; if !(weight_l.is_contiguous() && scb_l.is_contiguous()) { - diffuse_rs_common::bail!("All inputs must be contiguous"); + diffusion_rs_common::bail!("All inputs must be contiguous"); } let command_buffer = weight_s.device().command_buffer()?; @@ -731,14 +731,14 @@ impl CustomOp2 for Dequantize8BitOp { let output = device.new_buffer(n, self.out_ty, "dequant-8bit-bnb")?; if weight_s.dtype() != DType::I8 { - diffuse_rs_common::bail!("input must be i8"); + diffusion_rs_common::bail!("input must be i8"); } if scb_s.dtype() != DType::F32 { - diffuse_rs_common::bail!("scb must be f32"); + diffusion_rs_common::bail!("scb must be f32"); } if row != scb_l.dim(0)? { - diffuse_rs_common::bail!("scb dim0 must match weight dim0"); + diffusion_rs_common::bail!("scb dim0 must match weight dim0"); } crate::metal_kernels::call_dequant_bnb_8bit( @@ -755,10 +755,10 @@ impl CustomOp2 for Dequantize8BitOp { col, n, ) - .map_err(diffuse_rs_common::core::Error::wrap)?; + .map_err(diffusion_rs_common::core::Error::wrap)?; let newstorage = - diffuse_rs_common::core::MetalStorage::new(output, device.clone(), n, self.out_ty); + diffusion_rs_common::core::MetalStorage::new(output, device.clone(), n, self.out_ty); Ok((newstorage, weight_l.shape().clone())) } } diff --git a/diffuse_rs_backend/src/cublaslt/api.rs b/diffusion_rs_backend/src/cublaslt/api.rs similarity index 70% rename from diffuse_rs_backend/src/cublaslt/api.rs rename to diffusion_rs_backend/src/cublaslt/api.rs index e8ef14c..4146ab3 100644 --- a/diffuse_rs_backend/src/cublaslt/api.rs +++ b/diffusion_rs_backend/src/cublaslt/api.rs @@ -1,10 +1,12 @@ -use diffuse_rs_common::core::cuda::cudarc::driver::DevicePtr; +use diffusion_rs_common::core::cuda::cudarc::driver::DevicePtr; use float8::F8E4M3; use std::ffi::c_int; -use diffuse_rs_common::core::backend::BackendStorage; -use diffuse_rs_common::core::cuda_backend::WrapErr; -use diffuse_rs_common::core::{CpuStorage, DType, Device, Layout, Result, Shape, Storage, Tensor}; +use diffusion_rs_common::core::backend::BackendStorage; +use diffusion_rs_common::core::cuda_backend::WrapErr; +use diffusion_rs_common::core::{ + CpuStorage, DType, Device, Layout, Result, Shape, Storage, Tensor, +}; use half::{bf16, f16}; use std::sync::Arc; @@ -18,7 +20,7 @@ impl CublasLt { pub fn new(device: &Device) -> Result { let dev = match device { Device::Cuda(d) => d, - _ => diffuse_rs_common::bail!("`device` must be a `cuda` device"), + _ => diffusion_rs_common::bail!("`device` must be a `cuda` device"), }; let inner = CudaBlasLT::new(dev.cuda_device()).unwrap(); @@ -38,13 +40,13 @@ pub struct CublasLTBatchMatmul { impl CublasLTBatchMatmul { pub fn fwd_f16( &self, - a: &diffuse_rs_common::core::CudaStorage, + a: &diffusion_rs_common::core::CudaStorage, a_l: &Layout, - b: &diffuse_rs_common::core::CudaStorage, + b: &diffusion_rs_common::core::CudaStorage, b_l: &Layout, - bias: Option<&diffuse_rs_common::core::CudaStorage>, + bias: Option<&diffusion_rs_common::core::CudaStorage>, bias_l: Option<&Layout>, - ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> { + ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> { let dev = a.device(); // Assume TN @@ -52,11 +54,11 @@ impl CublasLTBatchMatmul { let (b_0, n, b_2) = b_l.shape().dims3()?; if b_2 != k { - diffuse_rs_common::bail!("This layer only supports TN layout"); + diffusion_rs_common::bail!("This layer only supports TN layout"); } if b_0 != batch_size { - diffuse_rs_common::bail!("`b` must have the same batch size as `a`") + diffusion_rs_common::bail!("`b` must have the same batch size as `a`") } let lda = k; @@ -70,7 +72,7 @@ impl CublasLTBatchMatmul { let bias = if let (Some(bias), Some(bias_l)) = (bias, bias_l) { if bias_l.shape().dims1()? != m { - diffuse_rs_common::bail!("Bias does not have the correct shape"); + diffusion_rs_common::bail!("Bias does not have the correct shape"); } Some(bias.as_cuda_slice::()?.slice(bias_l.start_offset()..)) @@ -82,25 +84,25 @@ impl CublasLTBatchMatmul { let (c, c_l) = c.storage_and_layout(); let c = match &*c { Storage::Cuda(storage) => storage.as_cuda_slice::()?, - _ => diffuse_rs_common::bail!("`c` must be a cuda tensor"), + _ => diffusion_rs_common::bail!("`c` must be a cuda tensor"), }; match c_l.contiguous_offsets() { Some((o1, o2)) => { if o1 != 0 { - diffuse_rs_common::bail!("`c` start offset must be 0"); + diffusion_rs_common::bail!("`c` start offset must be 0"); } if o2 != out_shape.elem_count() { - diffuse_rs_common::bail!( + diffusion_rs_common::bail!( "`c` end offset must be {}", out_shape.elem_count() ) } } - None => diffuse_rs_common::bail!("`c` has to be contiguous"), + None => diffusion_rs_common::bail!("`c` has to be contiguous"), }; if c_l.shape().dims3()? != (batch_size, n, m) { - diffuse_rs_common::bail!("`c` does not have the correct shape"); + diffusion_rs_common::bail!("`c` does not have the correct shape"); } // Set beta to 0.0 if it is not set @@ -134,23 +136,23 @@ impl CublasLTBatchMatmul { unsafe { self.cublaslt .matmul(config, &a, &b, &mut out, bias.as_ref(), self.act.as_ref()) - .map_err(|e| diffuse_rs_common::core::Error::Cuda(Box::new(e)))?; + .map_err(|e| diffusion_rs_common::core::Error::Cuda(Box::new(e)))?; } - let out = diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone()); + let out = diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone()); Ok((out, out_shape)) } pub fn fwd_bf16( &self, - a: &diffuse_rs_common::core::CudaStorage, + a: &diffusion_rs_common::core::CudaStorage, a_l: &Layout, - b: &diffuse_rs_common::core::CudaStorage, + b: &diffusion_rs_common::core::CudaStorage, b_l: &Layout, - bias: Option<&diffuse_rs_common::core::CudaStorage>, + bias: Option<&diffusion_rs_common::core::CudaStorage>, bias_l: Option<&Layout>, - ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> { + ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> { let dev = a.device(); // Assume TN @@ -158,11 +160,11 @@ impl CublasLTBatchMatmul { let (b_0, n, b_2) = b_l.shape().dims3()?; if b_2 != k { - diffuse_rs_common::bail!("This layer only supports TN layout"); + diffusion_rs_common::bail!("This layer only supports TN layout"); } if b_0 != batch_size { - diffuse_rs_common::bail!("`b` must have the same batch size as `a`") + diffusion_rs_common::bail!("`b` must have the same batch size as `a`") } let lda = k; @@ -176,7 +178,7 @@ impl CublasLTBatchMatmul { let bias = if let (Some(bias), Some(bias_l)) = (bias, bias_l) { if bias_l.shape().dims1()? != m { - diffuse_rs_common::bail!("Bias does not have the correct shape"); + diffusion_rs_common::bail!("Bias does not have the correct shape"); } Some(bias.as_cuda_slice::()?.slice(bias_l.start_offset()..)) @@ -188,25 +190,25 @@ impl CublasLTBatchMatmul { let (c, c_l) = c.storage_and_layout(); let c = match &*c { Storage::Cuda(storage) => storage.as_cuda_slice::()?, - _ => diffuse_rs_common::bail!("`c` must be a cuda tensor"), + _ => diffusion_rs_common::bail!("`c` must be a cuda tensor"), }; match c_l.contiguous_offsets() { Some((o1, o2)) => { if o1 != 0 { - diffuse_rs_common::bail!("`c` start offset must be 0"); + diffusion_rs_common::bail!("`c` start offset must be 0"); } if o2 != out_shape.elem_count() { - diffuse_rs_common::bail!( + diffusion_rs_common::bail!( "`c` end offset must be {}", out_shape.elem_count() ) } } - None => diffuse_rs_common::bail!("`c` has to be contiguous"), + None => diffusion_rs_common::bail!("`c` has to be contiguous"), }; if c_l.shape().dims3()? != (batch_size, n, m) { - diffuse_rs_common::bail!("`c` does not have the correct shape"); + diffusion_rs_common::bail!("`c` does not have the correct shape"); } // Set beta to 0.0 if it is not set @@ -240,23 +242,23 @@ impl CublasLTBatchMatmul { unsafe { self.cublaslt .matmul(config, &a, &b, &mut out, bias.as_ref(), self.act.as_ref()) - .map_err(|e| diffuse_rs_common::core::Error::Cuda(Box::new(e)))?; + .map_err(|e| diffusion_rs_common::core::Error::Cuda(Box::new(e)))?; } - let out = diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone()); + let out = diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone()); Ok((out, out_shape)) } pub fn fwd_f32( &self, - a: &diffuse_rs_common::core::CudaStorage, + a: &diffusion_rs_common::core::CudaStorage, a_l: &Layout, - b: &diffuse_rs_common::core::CudaStorage, + b: &diffusion_rs_common::core::CudaStorage, b_l: &Layout, - bias: Option<&diffuse_rs_common::core::CudaStorage>, + bias: Option<&diffusion_rs_common::core::CudaStorage>, bias_l: Option<&Layout>, - ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> { + ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> { let dev = a.device(); // Assume TN @@ -264,11 +266,11 @@ impl CublasLTBatchMatmul { let (b_0, n, b_2) = b_l.shape().dims3()?; if b_2 != k { - diffuse_rs_common::bail!("This layer only supports TN layout"); + diffusion_rs_common::bail!("This layer only supports TN layout"); } if b_0 != batch_size { - diffuse_rs_common::bail!("`b` must have the same batch size as `a`") + diffusion_rs_common::bail!("`b` must have the same batch size as `a`") } let lda = k; @@ -282,7 +284,7 @@ impl CublasLTBatchMatmul { let bias = if let (Some(bias), Some(bias_l)) = (bias, bias_l) { if bias_l.shape().dims1()? != m { - diffuse_rs_common::bail!("Bias does not have the correct shape"); + diffusion_rs_common::bail!("Bias does not have the correct shape"); } Some(bias.as_cuda_slice::()?.slice(bias_l.start_offset()..)) @@ -294,25 +296,25 @@ impl CublasLTBatchMatmul { let (c, c_l) = c.storage_and_layout(); let c = match &*c { Storage::Cuda(storage) => storage.as_cuda_slice::()?, - _ => diffuse_rs_common::bail!("`c` must be a cuda tensor"), + _ => diffusion_rs_common::bail!("`c` must be a cuda tensor"), }; match c_l.contiguous_offsets() { Some((o1, o2)) => { if o1 != 0 { - diffuse_rs_common::bail!("`c` start offset must be 0"); + diffusion_rs_common::bail!("`c` start offset must be 0"); } if o2 != out_shape.elem_count() { - diffuse_rs_common::bail!( + diffusion_rs_common::bail!( "`c` end offset must be {}", out_shape.elem_count() ) } } - None => diffuse_rs_common::bail!("`c` has to be contiguous"), + None => diffusion_rs_common::bail!("`c` has to be contiguous"), }; if c_l.shape().dims3()? != (batch_size, n, m) { - diffuse_rs_common::bail!("`c` does not have the correct shape"); + diffusion_rs_common::bail!("`c` does not have the correct shape"); } // Set beta to 0.0 if it is not set @@ -346,16 +348,16 @@ impl CublasLTBatchMatmul { unsafe { self.cublaslt .matmul(config, &a, &b, &mut out, bias.as_ref(), self.act.as_ref()) - .map_err(|e| diffuse_rs_common::core::Error::Cuda(Box::new(e)))?; + .map_err(|e| diffusion_rs_common::core::Error::Cuda(Box::new(e)))?; } - let out = diffuse_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone()); + let out = diffusion_rs_common::core::CudaStorage::wrap_cuda_slice(out, dev.clone()); Ok((out, out_shape)) } } -impl diffuse_rs_common::core::CustomOp2 for CublasLTBatchMatmul { +impl diffusion_rs_common::core::CustomOp2 for CublasLTBatchMatmul { fn name(&self) -> &'static str { "cublaslt-batch-matmul" } @@ -367,22 +369,22 @@ impl diffuse_rs_common::core::CustomOp2 for CublasLTBatchMatmul { _: &CpuStorage, _: &Layout, ) -> Result<(CpuStorage, Shape)> { - diffuse_rs_common::bail!("no cpu support for cublaslt-batch-matmul") + diffusion_rs_common::bail!("no cpu support for cublaslt-batch-matmul") } fn cuda_fwd( &self, - a: &diffuse_rs_common::core::CudaStorage, + a: &diffusion_rs_common::core::CudaStorage, a_l: &Layout, - b: &diffuse_rs_common::core::CudaStorage, + b: &diffusion_rs_common::core::CudaStorage, b_l: &Layout, - ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> { + ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> { match a.dtype() { - diffuse_rs_common::core::DType::F16 => self.fwd_f16(a, a_l, b, b_l, None, None), - diffuse_rs_common::core::DType::BF16 => self.fwd_bf16(a, a_l, b, b_l, None, None), - diffuse_rs_common::core::DType::F32 => self.fwd_f32(a, a_l, b, b_l, None, None), + diffusion_rs_common::core::DType::F16 => self.fwd_f16(a, a_l, b, b_l, None, None), + diffusion_rs_common::core::DType::BF16 => self.fwd_bf16(a, a_l, b, b_l, None, None), + diffusion_rs_common::core::DType::F32 => self.fwd_f32(a, a_l, b, b_l, None, None), dt => { - diffuse_rs_common::bail!( + diffusion_rs_common::bail!( "cublaslt-batch-matmul is only supported for f16/bf16/f32 ({dt:?})" ) } @@ -390,7 +392,7 @@ impl diffuse_rs_common::core::CustomOp2 for CublasLTBatchMatmul { } } -impl diffuse_rs_common::core::CustomOp3 for CublasLTBatchMatmul { +impl diffusion_rs_common::core::CustomOp3 for CublasLTBatchMatmul { fn name(&self) -> &'static str { "cublaslt-batch-matmul-add" } @@ -404,29 +406,29 @@ impl diffuse_rs_common::core::CustomOp3 for CublasLTBatchMatmul { _: &CpuStorage, _: &Layout, ) -> Result<(CpuStorage, Shape)> { - diffuse_rs_common::bail!("no cpu support for cublaslt-batch-matmul-add") + diffusion_rs_common::bail!("no cpu support for cublaslt-batch-matmul-add") } fn cuda_fwd( &self, - a: &diffuse_rs_common::core::CudaStorage, + a: &diffusion_rs_common::core::CudaStorage, a_l: &Layout, - b: &diffuse_rs_common::core::CudaStorage, + b: &diffusion_rs_common::core::CudaStorage, b_l: &Layout, - bias: &diffuse_rs_common::core::CudaStorage, + bias: &diffusion_rs_common::core::CudaStorage, bias_l: &Layout, - ) -> Result<(diffuse_rs_common::core::CudaStorage, Shape)> { + ) -> Result<(diffusion_rs_common::core::CudaStorage, Shape)> { match a.dtype() { - diffuse_rs_common::core::DType::F16 => { + diffusion_rs_common::core::DType::F16 => { self.fwd_f16(a, a_l, b, b_l, Some(bias), Some(bias_l)) } - diffuse_rs_common::core::DType::BF16 => { + diffusion_rs_common::core::DType::BF16 => { self.fwd_bf16(a, a_l, b, b_l, Some(bias), Some(bias_l)) } - diffuse_rs_common::core::DType::F32 => { + diffusion_rs_common::core::DType::F32 => { self.fwd_f32(a, a_l, b, b_l, Some(bias), Some(bias_l)) } - dt => diffuse_rs_common::bail!( + dt => diffusion_rs_common::bail!( "cublaslt-batch-matmul-add is only supported for f16/bf16/f32 ({dt:?})" ), } diff --git a/diffuse_rs_backend/src/cublaslt/matmul.rs b/diffusion_rs_backend/src/cublaslt/matmul.rs similarity index 98% rename from diffuse_rs_backend/src/cublaslt/matmul.rs rename to diffusion_rs_backend/src/cublaslt/matmul.rs index afd24c3..b060156 100644 --- a/diffuse_rs_backend/src/cublaslt/matmul.rs +++ b/diffusion_rs_backend/src/cublaslt/matmul.rs @@ -1,11 +1,11 @@ use core::ffi::c_int; use core::mem; -use diffuse_rs_common::core::cuda::cudarc::cublaslt::result::set_matrix_layout_attribute; -use diffuse_rs_common::core::cuda::cudarc::cublaslt::{result, result::CublasError, sys}; -use diffuse_rs_common::core::cuda::cudarc::driver::sys::{ +use diffusion_rs_common::core::cuda::cudarc::cublaslt::result::set_matrix_layout_attribute; +use diffusion_rs_common::core::cuda::cudarc::cublaslt::{result, result::CublasError, sys}; +use diffusion_rs_common::core::cuda::cudarc::driver::sys::{ CUdevice_attribute, CUdeviceptr, CUstream, }; -use diffuse_rs_common::core::cuda::cudarc::driver::{ +use diffusion_rs_common::core::cuda::cudarc::driver::{ CudaDevice, CudaSlice, DevicePtr, DevicePtrMut, DriverError, }; use float8::F8E4M3; diff --git a/diffuse_rs_backend/src/cublaslt/mod.rs b/diffusion_rs_backend/src/cublaslt/mod.rs similarity index 91% rename from diffuse_rs_backend/src/cublaslt/mod.rs rename to diffusion_rs_backend/src/cublaslt/mod.rs index 3faaf29..7816d6a 100644 --- a/diffuse_rs_backend/src/cublaslt/mod.rs +++ b/diffusion_rs_backend/src/cublaslt/mod.rs @@ -2,8 +2,8 @@ #![allow(unused_variables, unused_imports, dead_code)] -use diffuse_rs_common::core::{Device, Result, Tensor}; -use diffuse_rs_common::nn::Activation as CandleActivation; +use diffusion_rs_common::core::{Device, Result, Tensor}; +use diffusion_rs_common::nn::Activation as CandleActivation; use once_cell::sync::Lazy; use std::sync::{Mutex, Once}; @@ -38,7 +38,7 @@ pub fn maybe_init_cublas_lt_wrapper() { // Check if we can call the driver // Then check if we can create a device // Then check that the device is CUDA - use diffuse_rs_common::core::cuda_backend::cudarc::driver; + use diffusion_rs_common::core::cuda_backend::cudarc::driver; CUBLASLT = driver::result::init() .ok() .and_then(|_| Device::cuda_if_available(0).ok()) @@ -107,13 +107,13 @@ impl CublasLtWrapper { )?; if Some(CandleActivation::Swiglu) == act { - result = diffuse_rs_common::nn::ops::swiglu(&result)?; + result = diffusion_rs_common::nn::ops::swiglu(&result)?; } Ok(result) } #[cfg(not(feature = "cuda"))] { - diffuse_rs_common::bail!("`cuda` feature is not enabled") + diffusion_rs_common::bail!("`cuda` feature is not enabled") } } } diff --git a/diffuse_rs_backend/src/gguf/mod.rs b/diffusion_rs_backend/src/gguf/mod.rs similarity index 93% rename from diffuse_rs_backend/src/gguf/mod.rs rename to diffusion_rs_backend/src/gguf/mod.rs index ecdeb39..9cdded5 100644 --- a/diffuse_rs_backend/src/gguf/mod.rs +++ b/diffusion_rs_backend/src/gguf/mod.rs @@ -1,8 +1,8 @@ use std::sync::Arc; -use diffuse_rs_common::core::Device; -use diffuse_rs_common::core::{quantized::QMatMul, DType, Result, Tensor}; -use diffuse_rs_common::nn::Module; +use diffusion_rs_common::core::Device; +use diffusion_rs_common::core::{quantized::QMatMul, DType, Result, Tensor}; +use diffusion_rs_common::nn::Module; use crate::{QuantMethod, QuantMethodConfig}; diff --git a/diffuse_rs_backend/src/lib.rs b/diffusion_rs_backend/src/lib.rs similarity index 94% rename from diffuse_rs_backend/src/lib.rs rename to diffusion_rs_backend/src/lib.rs index 603c3b0..69182da 100644 --- a/diffuse_rs_backend/src/lib.rs +++ b/diffusion_rs_backend/src/lib.rs @@ -3,7 +3,7 @@ use std::{ sync::Arc, }; -use diffuse_rs_common::core::{ +use diffusion_rs_common::core::{ quantized::{GgmlDType, QTensor}, DType, Device, Result, Tensor, }; @@ -21,8 +21,8 @@ pub use bitsandbytes::{BnbLinear, BnbQuantParmas, BnbQuantType}; pub use gguf::GgufMatMul; pub use unquantized::UnquantLinear; -use diffuse_rs_common::nn::{Linear, Module}; -use diffuse_rs_common::VarBuilder; +use diffusion_rs_common::nn::{Linear, Module}; +use diffusion_rs_common::VarBuilder; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Deserialize, Serialize, Default)] @@ -108,7 +108,7 @@ pub enum IsqType { } impl TryFrom for GgmlDType { - type Error = diffuse_rs_common::core::Error; + type Error = diffusion_rs_common::core::Error; fn try_from(value: IsqType) -> Result { let tp = match value { @@ -124,7 +124,7 @@ impl TryFrom for GgmlDType { IsqType::Q8K => Self::Q8K, IsqType::Q8_0 => Self::Q8_0, IsqType::Q8_1 => Self::Q8_1, - _ => diffuse_rs_common::bail!("Expected valid GGML ISQ type."), + _ => diffusion_rs_common::bail!("Expected valid GGML ISQ type."), }; #[cfg(feature = "cuda")] { @@ -141,7 +141,7 @@ impl TryFrom for GgmlDType { | GgmlDType::Q5K | GgmlDType::Q6K ) { - diffuse_rs_common::bail!("GGML ISQ type on CUDA must be one of `Q4_0`, `Q4_1`, `Q5_0`, `Q5_1`, `Q8_0`, `Q2K`, `Q3K`, `Q4K`, `Q5K`, `Q6K`, `HQQ8`, `HQQ4`") + diffusion_rs_common::bail!("GGML ISQ type on CUDA must be one of `Q4_0`, `Q4_1`, `Q5_0`, `Q5_1`, `Q8_0`, `Q2K`, `Q3K`, `Q4K`, `Q5K`, `Q6K`, `HQQ8`, `HQQ4`") } } Ok(tp) diff --git a/diffuse_rs_backend/src/metal_kernels/bnb_dequantize.metal b/diffusion_rs_backend/src/metal_kernels/bnb_dequantize.metal similarity index 100% rename from diffuse_rs_backend/src/metal_kernels/bnb_dequantize.metal rename to diffusion_rs_backend/src/metal_kernels/bnb_dequantize.metal diff --git a/diffuse_rs_backend/src/metal_kernels/mod.rs b/diffusion_rs_backend/src/metal_kernels/mod.rs similarity index 99% rename from diffuse_rs_backend/src/metal_kernels/mod.rs rename to diffusion_rs_backend/src/metal_kernels/mod.rs index 8b416af..2049f4d 100644 --- a/diffuse_rs_backend/src/metal_kernels/mod.rs +++ b/diffusion_rs_backend/src/metal_kernels/mod.rs @@ -1,4 +1,4 @@ -use diffuse_rs_common::core::DType; +use diffusion_rs_common::core::DType; use metal::{ Buffer, CompileOptions, ComputeCommandEncoderRef, ComputePipelineState, Device, Function, FunctionConstantValues, Library, MTLSize, diff --git a/diffuse_rs_backend/src/metal_kernels/sdpa.metal b/diffusion_rs_backend/src/metal_kernels/sdpa.metal similarity index 100% rename from diffuse_rs_backend/src/metal_kernels/sdpa.metal rename to diffusion_rs_backend/src/metal_kernels/sdpa.metal diff --git a/diffuse_rs_backend/src/metal_kernels/utils.rs b/diffusion_rs_backend/src/metal_kernels/utils.rs similarity index 100% rename from diffuse_rs_backend/src/metal_kernels/utils.rs rename to diffusion_rs_backend/src/metal_kernels/utils.rs diff --git a/diffuse_rs_backend/src/ops.rs b/diffusion_rs_backend/src/ops.rs similarity index 84% rename from diffuse_rs_backend/src/ops.rs rename to diffusion_rs_backend/src/ops.rs index 2012e41..53c070a 100644 --- a/diffuse_rs_backend/src/ops.rs +++ b/diffusion_rs_backend/src/ops.rs @@ -1,4 +1,4 @@ -use diffuse_rs_common::core::{CpuStorage, Layout, Result, Shape, Tensor}; +use diffusion_rs_common::core::{CpuStorage, Layout, Result, Shape, Tensor}; #[allow(dead_code)] struct Sdpa { @@ -6,7 +6,7 @@ struct Sdpa { softcapping: f32, } -impl diffuse_rs_common::core::CustomOp3 for Sdpa { +impl diffusion_rs_common::core::CustomOp3 for Sdpa { fn name(&self) -> &'static str { "metal-sdpa" } @@ -20,21 +20,21 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa { _s3: &CpuStorage, _l3: &Layout, ) -> Result<(CpuStorage, Shape)> { - diffuse_rs_common::bail!("SDPA has no cpu impl") + diffusion_rs_common::bail!("SDPA has no cpu impl") } #[cfg(feature = "metal")] fn metal_fwd( &self, - q: &diffuse_rs_common::core::MetalStorage, + q: &diffusion_rs_common::core::MetalStorage, q_l: &Layout, - k: &diffuse_rs_common::core::MetalStorage, + k: &diffusion_rs_common::core::MetalStorage, k_l: &Layout, - v: &diffuse_rs_common::core::MetalStorage, + v: &diffusion_rs_common::core::MetalStorage, v_l: &Layout, - ) -> Result<(diffuse_rs_common::core::MetalStorage, Shape)> { + ) -> Result<(diffusion_rs_common::core::MetalStorage, Shape)> { use crate::metal_kernels::SdpaDType; - use diffuse_rs_common::core::{backend::BackendStorage, DType, Shape, D}; + use diffusion_rs_common::core::{backend::BackendStorage, DType, Shape, D}; let device = q.device(); @@ -45,17 +45,17 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa { // q,k must have matching emb dim if q_l.dim(D::Minus1)? != k_l.dim(D::Minus1)? { - diffuse_rs_common::bail!("`q` and `k` last dims must match"); + diffusion_rs_common::bail!("`q` and `k` last dims must match"); } // k,v must have matching n kv heads if v_l.dim(D::Minus(3))? != k_l.dim(D::Minus(3))? { - diffuse_rs_common::bail!("`k` and `v` head dims must match"); + diffusion_rs_common::bail!("`k` and `v` head dims must match"); } // n_heads % n_kv_heads == 0; n_heads >= 1, n_kv_heads >= 1. if q_l.dim(D::Minus(3))? % k_l.dim(D::Minus(3))? != 0 { - diffuse_rs_common::bail!("query `n_heads` must be a multiple of `n_kv_heads`"); + diffusion_rs_common::bail!("query `n_heads` must be a multiple of `n_kv_heads`"); } let k_head = k_l.dim(D::Minus1)?; @@ -75,7 +75,7 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa { implementation_supports_use_case &= supports_sdpa_full || supports_sdpa_vector; if !supported_head_dim { - diffuse_rs_common::bail!( + diffusion_rs_common::bail!( "Meta SDPA does not support q head dim {q_head}: q dims {:?}, k dims {:?}, v dims {:?}.", q_l.dims(), k_l.dims(), @@ -83,7 +83,7 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa { ); } if !implementation_supports_use_case { - diffuse_rs_common::bail!( + diffusion_rs_common::bail!( "Meta SDPA does not support q dims {:?}, k dims {:?}, v dims {:?}.", q_l.dims(), k_l.dims(), @@ -93,7 +93,7 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa { for t in [k.dtype(), v.dtype()] { if q.dtype() != t { - diffuse_rs_common::bail!("all q, k, v dtypes must match."); + diffusion_rs_common::bail!("all q, k, v dtypes must match."); } } @@ -101,7 +101,7 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa { DType::BF16 => SdpaDType::BF16, DType::F16 => SdpaDType::F16, DType::F32 => SdpaDType::F32, - other => diffuse_rs_common::bail!("unsupported sdpa type {other:?}"), + other => diffusion_rs_common::bail!("unsupported sdpa type {other:?}"), }; let command_buffer = q.device().command_buffer()?; @@ -156,7 +156,7 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa { self.softcapping, itype, ) - .map_err(diffuse_rs_common::core::Error::wrap)?; + .map_err(diffusion_rs_common::core::Error::wrap)?; } else { command_buffer.set_label("vector_attention"); crate::metal_kernels::call_sdpa_vector( @@ -178,11 +178,11 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa { self.softcapping, itype, ) - .map_err(diffuse_rs_common::core::Error::wrap)?; + .map_err(diffusion_rs_common::core::Error::wrap)?; } } else if supports_sdpa_full { if q_l.dim(2)? != k_l.dim(2)? { - diffuse_rs_common::bail!( + diffusion_rs_common::bail!( "query and key sequence length must be equal if using full metal sdpa" ) } @@ -204,12 +204,12 @@ impl diffuse_rs_common::core::CustomOp3 for Sdpa { self.softcapping, itype, ) - .map_err(diffuse_rs_common::core::Error::wrap)?; + .map_err(diffusion_rs_common::core::Error::wrap)?; } else { - diffuse_rs_common::bail!("must be vector or full sdpa kernel"); + diffusion_rs_common::bail!("must be vector or full sdpa kernel"); } - let newstorage = diffuse_rs_common::core::MetalStorage::new( + let newstorage = diffusion_rs_common::core::MetalStorage::new( output, device.clone(), elem_count, @@ -256,7 +256,7 @@ pub fn sdpa(q: &Tensor, k: &Tensor, v: &Tensor, scale: f32, softcapping: f32) -> att = (att * softcapping as f64)?; } - att = diffuse_rs_common::nn::ops::softmax_last_dim(&att)?; + att = diffusion_rs_common::nn::ops::softmax_last_dim(&att)?; att.matmul(v) } } diff --git a/diffuse_rs_backend/src/unquantized/mod.rs b/diffusion_rs_backend/src/unquantized/mod.rs similarity index 94% rename from diffuse_rs_backend/src/unquantized/mod.rs rename to diffusion_rs_backend/src/unquantized/mod.rs index fe42448..acef39b 100644 --- a/diffuse_rs_backend/src/unquantized/mod.rs +++ b/diffusion_rs_backend/src/unquantized/mod.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use diffuse_rs_common::core::{DType, Device, DeviceLocation, Result, Shape, Tensor, D}; +use diffusion_rs_common::core::{DType, Device, DeviceLocation, Result, Shape, Tensor, D}; use crate::{ cublaslt::{maybe_init_cublas_lt_wrapper, CUBLASLT_HANDLE}, @@ -14,7 +14,7 @@ pub struct UnquantLinear { } impl QuantMethod for UnquantLinear { - fn new(method: QuantMethodConfig) -> diffuse_rs_common::core::Result + fn new(method: QuantMethodConfig) -> diffusion_rs_common::core::Result where Self: Sized, { diff --git a/diffuse_rs_cli/Cargo.toml b/diffusion_rs_cli/Cargo.toml similarity index 57% rename from diffuse_rs_cli/Cargo.toml rename to diffusion_rs_cli/Cargo.toml index 3117ab5..a44a244 100644 --- a/diffuse_rs_cli/Cargo.toml +++ b/diffusion_rs_cli/Cargo.toml @@ -1,10 +1,10 @@ [package] -name = "diffuse_rs_cli" +name = "diffusion_rs_cli" readme = "README.md" authors.workspace = true version.workspace = true edition.workspace = true -description = "CLI for diffuse_rs" +description = "CLI for diffusion_rs" repository.workspace = true keywords.workspace = true categories.workspace = true @@ -12,7 +12,7 @@ license.workspace = true homepage.workspace = true [dependencies] -diffuse_rs_core = { path = "../diffuse_rs_core" } +diffusion_rs_core = { path = "../diffusion_rs_core" } clap.workspace = true anyhow.workspace = true tracing.workspace = true @@ -20,8 +20,8 @@ tracing-subscriber.workspace = true cliclack.workspace = true [features] -cuda = ["diffuse_rs_core/cuda"] -cudnn = ["diffuse_rs_core/cudnn"] -metal = ["diffuse_rs_core/metal"] -accelerate = ["diffuse_rs_core/accelerate"] -mkl = ["diffuse_rs_core/mkl"] +cuda = ["diffusion_rs_core/cuda"] +cudnn = ["diffusion_rs_core/cudnn"] +metal = ["diffusion_rs_core/metal"] +accelerate = ["diffusion_rs_core/accelerate"] +mkl = ["diffusion_rs_core/mkl"] diff --git a/diffusion_rs_cli/README.md b/diffusion_rs_cli/README.md new file mode 100644 index 0000000..44b3b31 --- /dev/null +++ b/diffusion_rs_cli/README.md @@ -0,0 +1,22 @@ +# `diffusion_rs_cli` + +CLI for diffusion-rs. + +## Examples +- FLUX dev: +``` +diffusion_rs_cli --scale 3.5 --num-steps 50 dduf -f FLUX.1-dev-Q4-bnb.dduf +``` + +``` +diffusion_rs_cli --scale 3.5 --num-steps 50 model-id -m black-forest-labs/FLUX.1-dev +``` + +- FLUX schnell: +``` +diffusion_rs_cli --scale 0.0 --num-steps 4 dduf -f FLUX.1-schnell-Q8-bnb.dduf +``` + +``` +diffusion_rs_cli --scale 0.0 --num-steps 4 model-id -m black-forest-labs/FLUX.1-dev +``` \ No newline at end of file diff --git a/diffuse_rs_cli/src/main.rs b/diffusion_rs_cli/src/main.rs similarity index 97% rename from diffuse_rs_cli/src/main.rs rename to diffusion_rs_cli/src/main.rs index 432fe13..9b0cb7d 100644 --- a/diffuse_rs_cli/src/main.rs +++ b/diffusion_rs_cli/src/main.rs @@ -2,7 +2,9 @@ use cliclack::input; use std::{path::PathBuf, time::Instant}; use clap::{Parser, Subcommand}; -use diffuse_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource}; +use diffusion_rs_core::{ + DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource, +}; use tracing::level_filters::LevelFilter; use tracing_subscriber::EnvFilter; diff --git a/diffuse_rs_common/Cargo.toml b/diffusion_rs_common/Cargo.toml similarity index 98% rename from diffuse_rs_common/Cargo.toml rename to diffusion_rs_common/Cargo.toml index 7a3cd87..f774108 100644 --- a/diffuse_rs_common/Cargo.toml +++ b/diffusion_rs_common/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "diffuse_rs_common" +name = "diffusion_rs_common" readme = "README.md" authors.workspace = true version.workspace = true diff --git a/diffusion_rs_common/README.md b/diffusion_rs_common/README.md new file mode 100644 index 0000000..7649a17 --- /dev/null +++ b/diffusion_rs_common/README.md @@ -0,0 +1,3 @@ +# `diffusion_rs_common` + +Common functionality for diffusion-rs, including core ML framework based on Candle, NN functionality, and DDUF loading. \ No newline at end of file diff --git a/diffuse_rs_common/build.rs b/diffusion_rs_common/build.rs similarity index 100% rename from diffuse_rs_common/build.rs rename to diffusion_rs_common/build.rs diff --git a/diffuse_rs_common/src/core/LICENSE b/diffusion_rs_common/src/core/LICENSE similarity index 100% rename from diffuse_rs_common/src/core/LICENSE rename to diffusion_rs_common/src/core/LICENSE diff --git a/diffuse_rs_common/src/core/accelerate.rs b/diffusion_rs_common/src/core/accelerate.rs similarity index 100% rename from diffuse_rs_common/src/core/accelerate.rs rename to diffusion_rs_common/src/core/accelerate.rs diff --git a/diffuse_rs_common/src/core/backend.rs b/diffusion_rs_common/src/core/backend.rs similarity index 100% rename from diffuse_rs_common/src/core/backend.rs rename to diffusion_rs_common/src/core/backend.rs diff --git a/diffuse_rs_common/src/core/backprop.rs b/diffusion_rs_common/src/core/backprop.rs similarity index 100% rename from diffuse_rs_common/src/core/backprop.rs rename to diffusion_rs_common/src/core/backprop.rs diff --git a/diffuse_rs_common/src/core/conv.rs b/diffusion_rs_common/src/core/conv.rs similarity index 100% rename from diffuse_rs_common/src/core/conv.rs rename to diffusion_rs_common/src/core/conv.rs diff --git a/diffuse_rs_common/src/core/convert.rs b/diffusion_rs_common/src/core/convert.rs similarity index 100% rename from diffuse_rs_common/src/core/convert.rs rename to diffusion_rs_common/src/core/convert.rs diff --git a/diffuse_rs_common/src/core/cpu/avx.rs b/diffusion_rs_common/src/core/cpu/avx.rs similarity index 100% rename from diffuse_rs_common/src/core/cpu/avx.rs rename to diffusion_rs_common/src/core/cpu/avx.rs diff --git a/diffuse_rs_common/src/core/cpu/erf.rs b/diffusion_rs_common/src/core/cpu/erf.rs similarity index 100% rename from diffuse_rs_common/src/core/cpu/erf.rs rename to diffusion_rs_common/src/core/cpu/erf.rs diff --git a/diffuse_rs_common/src/core/cpu/kernels.rs b/diffusion_rs_common/src/core/cpu/kernels.rs similarity index 100% rename from diffuse_rs_common/src/core/cpu/kernels.rs rename to diffusion_rs_common/src/core/cpu/kernels.rs diff --git a/diffuse_rs_common/src/core/cpu/mod.rs b/diffusion_rs_common/src/core/cpu/mod.rs similarity index 100% rename from diffuse_rs_common/src/core/cpu/mod.rs rename to diffusion_rs_common/src/core/cpu/mod.rs diff --git a/diffuse_rs_common/src/core/cpu/neon.rs b/diffusion_rs_common/src/core/cpu/neon.rs similarity index 100% rename from diffuse_rs_common/src/core/cpu/neon.rs rename to diffusion_rs_common/src/core/cpu/neon.rs diff --git a/diffuse_rs_common/src/core/cpu/simd128.rs b/diffusion_rs_common/src/core/cpu/simd128.rs similarity index 100% rename from diffuse_rs_common/src/core/cpu/simd128.rs rename to diffusion_rs_common/src/core/cpu/simd128.rs diff --git a/diffuse_rs_common/src/core/cpu_backend/mod.rs b/diffusion_rs_common/src/core/cpu_backend/mod.rs similarity index 100% rename from diffuse_rs_common/src/core/cpu_backend/mod.rs rename to diffusion_rs_common/src/core/cpu_backend/mod.rs diff --git a/diffuse_rs_common/src/core/cpu_backend/utils.rs b/diffusion_rs_common/src/core/cpu_backend/utils.rs similarity index 100% rename from diffuse_rs_common/src/core/cpu_backend/utils.rs rename to diffusion_rs_common/src/core/cpu_backend/utils.rs diff --git a/diffuse_rs_common/src/core/cuda_backend/cudnn.rs b/diffusion_rs_common/src/core/cuda_backend/cudnn.rs similarity index 100% rename from diffuse_rs_common/src/core/cuda_backend/cudnn.rs rename to diffusion_rs_common/src/core/cuda_backend/cudnn.rs diff --git a/diffuse_rs_common/src/core/cuda_backend/device.rs b/diffusion_rs_common/src/core/cuda_backend/device.rs similarity index 100% rename from diffuse_rs_common/src/core/cuda_backend/device.rs rename to diffusion_rs_common/src/core/cuda_backend/device.rs diff --git a/diffuse_rs_common/src/core/cuda_backend/error.rs b/diffusion_rs_common/src/core/cuda_backend/error.rs similarity index 100% rename from diffuse_rs_common/src/core/cuda_backend/error.rs rename to diffusion_rs_common/src/core/cuda_backend/error.rs diff --git a/diffuse_rs_common/src/core/cuda_backend/mod.rs b/diffusion_rs_common/src/core/cuda_backend/mod.rs similarity index 100% rename from diffuse_rs_common/src/core/cuda_backend/mod.rs rename to diffusion_rs_common/src/core/cuda_backend/mod.rs diff --git a/diffuse_rs_common/src/core/cuda_backend/utils.rs b/diffusion_rs_common/src/core/cuda_backend/utils.rs similarity index 100% rename from diffuse_rs_common/src/core/cuda_backend/utils.rs rename to diffusion_rs_common/src/core/cuda_backend/utils.rs diff --git a/diffuse_rs_common/src/core/custom_op.rs b/diffusion_rs_common/src/core/custom_op.rs similarity index 100% rename from diffuse_rs_common/src/core/custom_op.rs rename to diffusion_rs_common/src/core/custom_op.rs diff --git a/diffuse_rs_common/src/core/device.rs b/diffusion_rs_common/src/core/device.rs similarity index 100% rename from diffuse_rs_common/src/core/device.rs rename to diffusion_rs_common/src/core/device.rs diff --git a/diffuse_rs_common/src/core/display.rs b/diffusion_rs_common/src/core/display.rs similarity index 100% rename from diffuse_rs_common/src/core/display.rs rename to diffusion_rs_common/src/core/display.rs diff --git a/diffuse_rs_common/src/core/dtype.rs b/diffusion_rs_common/src/core/dtype.rs similarity index 100% rename from diffuse_rs_common/src/core/dtype.rs rename to diffusion_rs_common/src/core/dtype.rs diff --git a/diffuse_rs_common/src/core/dummy_cuda_backend.rs b/diffusion_rs_common/src/core/dummy_cuda_backend.rs similarity index 100% rename from diffuse_rs_common/src/core/dummy_cuda_backend.rs rename to diffusion_rs_common/src/core/dummy_cuda_backend.rs diff --git a/diffuse_rs_common/src/core/dummy_metal_backend.rs b/diffusion_rs_common/src/core/dummy_metal_backend.rs similarity index 100% rename from diffuse_rs_common/src/core/dummy_metal_backend.rs rename to diffusion_rs_common/src/core/dummy_metal_backend.rs diff --git a/diffuse_rs_common/src/core/error.rs b/diffusion_rs_common/src/core/error.rs similarity index 100% rename from diffuse_rs_common/src/core/error.rs rename to diffusion_rs_common/src/core/error.rs diff --git a/diffuse_rs_common/src/core/indexer.rs b/diffusion_rs_common/src/core/indexer.rs similarity index 93% rename from diffuse_rs_common/src/core/indexer.rs rename to diffusion_rs_common/src/core/indexer.rs index 3215b42..b4ee963 100644 --- a/diffuse_rs_common/src/core/indexer.rs +++ b/diffusion_rs_common/src/core/indexer.rs @@ -7,7 +7,7 @@ impl Tensor { /// Intended to be use by the trait `.i()` /// /// ``` - /// # use diffuse_rs_common::core::{Tensor, DType, Device, IndexOp}; + /// # use diffusion_rs_common::core::{Tensor, DType, Device, IndexOp}; /// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// /// let c = a.i(0..1)?; @@ -22,7 +22,7 @@ impl Tensor { /// let c = a.i((.., ..=2))?; /// assert_eq!(c.shape().dims(), &[2, 3]); /// - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` fn index(&self, indexers: &[TensorIndexer]) -> Result { let mut x = self.clone(); @@ -142,7 +142,7 @@ where T: Into, { ///```rust - /// use diffuse_rs_common::core::{Tensor, DType, Device, IndexOp}; + /// use diffusion_rs_common::core::{Tensor, DType, Device, IndexOp}; /// let a = Tensor::new(&[ /// [0., 1.], /// [2., 3.], @@ -166,7 +166,7 @@ where /// [2., 3.], /// [4., 5.] /// ]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` fn i(&self, index: T) -> Result { self.index(&[index.into()]) @@ -178,7 +178,7 @@ where A: Into, { ///```rust - /// use diffuse_rs_common::core::{Tensor, DType, Device, IndexOp}; + /// use diffusion_rs_common::core::{Tensor, DType, Device, IndexOp}; /// let a = Tensor::new(&[ /// [0f32, 1.], /// [2. , 3.], @@ -202,7 +202,7 @@ where /// [2., 3.], /// [4., 5.] /// ]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` fn i(&self, (a,): (A,)) -> Result { self.index(&[a.into()]) @@ -215,7 +215,7 @@ where B: Into, { ///```rust - /// use diffuse_rs_common::core::{Tensor, DType, Device, IndexOp}; + /// use diffusion_rs_common::core::{Tensor, DType, Device, IndexOp}; /// let a = Tensor::new(&[[0f32, 1., 2.], [3., 4., 5.], [6., 7., 8.]], &Device::Cpu)?; /// /// let b = a.i((1, 0))?; @@ -228,7 +228,7 @@ where /// let d = a.i((2.., ..))?; /// assert_eq!(c.shape().dims(), &[2]); /// assert_eq!(c.to_vec1::()?, &[1., 4.]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` fn i(&self, (a, b): (A, B)) -> Result { self.index(&[a.into(), b.into()]) diff --git a/diffuse_rs_common/src/core/layout.rs b/diffusion_rs_common/src/core/layout.rs similarity index 100% rename from diffuse_rs_common/src/core/layout.rs rename to diffusion_rs_common/src/core/layout.rs diff --git a/diffuse_rs_common/src/core/metal_backend/device.rs b/diffusion_rs_common/src/core/metal_backend/device.rs similarity index 100% rename from diffuse_rs_common/src/core/metal_backend/device.rs rename to diffusion_rs_common/src/core/metal_backend/device.rs diff --git a/diffuse_rs_common/src/core/metal_backend/mod.rs b/diffusion_rs_common/src/core/metal_backend/mod.rs similarity index 100% rename from diffuse_rs_common/src/core/metal_backend/mod.rs rename to diffusion_rs_common/src/core/metal_backend/mod.rs diff --git a/diffuse_rs_common/src/core/mkl.rs b/diffusion_rs_common/src/core/mkl.rs similarity index 100% rename from diffuse_rs_common/src/core/mkl.rs rename to diffusion_rs_common/src/core/mkl.rs diff --git a/diffuse_rs_common/src/core/mod.rs b/diffusion_rs_common/src/core/mod.rs similarity index 100% rename from diffuse_rs_common/src/core/mod.rs rename to diffusion_rs_common/src/core/mod.rs diff --git a/diffuse_rs_common/src/core/npy.rs b/diffusion_rs_common/src/core/npy.rs similarity index 100% rename from diffuse_rs_common/src/core/npy.rs rename to diffusion_rs_common/src/core/npy.rs diff --git a/diffuse_rs_common/src/core/op.rs b/diffusion_rs_common/src/core/op.rs similarity index 100% rename from diffuse_rs_common/src/core/op.rs rename to diffusion_rs_common/src/core/op.rs diff --git a/diffuse_rs_common/src/core/pickle.rs b/diffusion_rs_common/src/core/pickle.rs similarity index 100% rename from diffuse_rs_common/src/core/pickle.rs rename to diffusion_rs_common/src/core/pickle.rs diff --git a/diffuse_rs_common/src/core/quantized/avx.rs b/diffusion_rs_common/src/core/quantized/avx.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/avx.rs rename to diffusion_rs_common/src/core/quantized/avx.rs diff --git a/diffuse_rs_common/src/core/quantized/cuda.rs b/diffusion_rs_common/src/core/quantized/cuda.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/cuda.rs rename to diffusion_rs_common/src/core/quantized/cuda.rs diff --git a/diffuse_rs_common/src/core/quantized/dummy_cuda.rs b/diffusion_rs_common/src/core/quantized/dummy_cuda.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/dummy_cuda.rs rename to diffusion_rs_common/src/core/quantized/dummy_cuda.rs diff --git a/diffuse_rs_common/src/core/quantized/dummy_metal.rs b/diffusion_rs_common/src/core/quantized/dummy_metal.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/dummy_metal.rs rename to diffusion_rs_common/src/core/quantized/dummy_metal.rs diff --git a/diffuse_rs_common/src/core/quantized/ggml_file.rs b/diffusion_rs_common/src/core/quantized/ggml_file.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/ggml_file.rs rename to diffusion_rs_common/src/core/quantized/ggml_file.rs diff --git a/diffuse_rs_common/src/core/quantized/gguf_file.rs b/diffusion_rs_common/src/core/quantized/gguf_file.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/gguf_file.rs rename to diffusion_rs_common/src/core/quantized/gguf_file.rs diff --git a/diffuse_rs_common/src/core/quantized/imatrix_file.rs b/diffusion_rs_common/src/core/quantized/imatrix_file.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/imatrix_file.rs rename to diffusion_rs_common/src/core/quantized/imatrix_file.rs diff --git a/diffuse_rs_common/src/core/quantized/k_quants.rs b/diffusion_rs_common/src/core/quantized/k_quants.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/k_quants.rs rename to diffusion_rs_common/src/core/quantized/k_quants.rs diff --git a/diffuse_rs_common/src/core/quantized/metal.rs b/diffusion_rs_common/src/core/quantized/metal.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/metal.rs rename to diffusion_rs_common/src/core/quantized/metal.rs diff --git a/diffuse_rs_common/src/core/quantized/mod.rs b/diffusion_rs_common/src/core/quantized/mod.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/mod.rs rename to diffusion_rs_common/src/core/quantized/mod.rs diff --git a/diffuse_rs_common/src/core/quantized/neon.rs b/diffusion_rs_common/src/core/quantized/neon.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/neon.rs rename to diffusion_rs_common/src/core/quantized/neon.rs diff --git a/diffuse_rs_common/src/core/quantized/simd128.rs b/diffusion_rs_common/src/core/quantized/simd128.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/simd128.rs rename to diffusion_rs_common/src/core/quantized/simd128.rs diff --git a/diffuse_rs_common/src/core/quantized/utils.rs b/diffusion_rs_common/src/core/quantized/utils.rs similarity index 100% rename from diffuse_rs_common/src/core/quantized/utils.rs rename to diffusion_rs_common/src/core/quantized/utils.rs diff --git a/diffuse_rs_common/src/core/safetensors.rs b/diffusion_rs_common/src/core/safetensors.rs similarity index 100% rename from diffuse_rs_common/src/core/safetensors.rs rename to diffusion_rs_common/src/core/safetensors.rs diff --git a/diffuse_rs_common/src/core/scalar.rs b/diffusion_rs_common/src/core/scalar.rs similarity index 100% rename from diffuse_rs_common/src/core/scalar.rs rename to diffusion_rs_common/src/core/scalar.rs diff --git a/diffuse_rs_common/src/core/shape.rs b/diffusion_rs_common/src/core/shape.rs similarity index 100% rename from diffuse_rs_common/src/core/shape.rs rename to diffusion_rs_common/src/core/shape.rs diff --git a/diffuse_rs_common/src/core/sort.rs b/diffusion_rs_common/src/core/sort.rs similarity index 100% rename from diffuse_rs_common/src/core/sort.rs rename to diffusion_rs_common/src/core/sort.rs diff --git a/diffuse_rs_common/src/core/storage.rs b/diffusion_rs_common/src/core/storage.rs similarity index 100% rename from diffuse_rs_common/src/core/storage.rs rename to diffusion_rs_common/src/core/storage.rs diff --git a/diffuse_rs_common/src/core/streaming.rs b/diffusion_rs_common/src/core/streaming.rs similarity index 100% rename from diffuse_rs_common/src/core/streaming.rs rename to diffusion_rs_common/src/core/streaming.rs diff --git a/diffuse_rs_common/src/core/strided_index.rs b/diffusion_rs_common/src/core/strided_index.rs similarity index 100% rename from diffuse_rs_common/src/core/strided_index.rs rename to diffusion_rs_common/src/core/strided_index.rs diff --git a/diffuse_rs_common/src/core/tensor.rs b/diffusion_rs_common/src/core/tensor.rs similarity index 96% rename from diffuse_rs_common/src/core/tensor.rs rename to diffusion_rs_common/src/core/tensor.rs index ccdd1dd..c2dda1a 100644 --- a/diffuse_rs_common/src/core/tensor.rs +++ b/diffusion_rs_common/src/core/tensor.rs @@ -56,13 +56,13 @@ impl AsRef for Tensor { /// The core struct for manipulating tensors. /// /// ```rust -/// use diffuse_rs_common::core::{Tensor, DType, Device}; +/// use diffusion_rs_common::core::{Tensor, DType, Device}; /// /// let a = Tensor::arange(0f32, 6f32, &Device::Cpu)?.reshape((2, 3))?; /// let b = Tensor::arange(0f32, 12f32, &Device::Cpu)?.reshape((3, 4))?; /// /// let c = a.matmul(&b)?; -/// # Ok::<(), diffuse_rs_common::core::Error>(()) +/// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` /// /// Tensors are reference counted with [`Arc`] so cloning them is cheap. @@ -209,11 +209,11 @@ impl Tensor { /// Creates a new tensor filled with ones. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, DType, Device}; + /// use diffusion_rs_common::core::{Tensor, DType, Device}; /// let a = Tensor::ones((2, 3), DType::F32, &Device::Cpu)?; /// let b = Tensor::from_slice(&[1.0f32, 1.0, 1.0, 1.0, 1.0, 1.0], (2, 3), &Device::Cpu)?; /// // a == b - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn ones>(shape: S, dtype: DType, device: &Device) -> Result { Self::ones_impl(shape, dtype, device, false) @@ -222,11 +222,11 @@ impl Tensor { /// Creates a new tensor filled with ones with same shape, dtype, and device as the other tensor. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, DType, Device}; + /// use diffusion_rs_common::core::{Tensor, DType, Device}; /// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// let b = a.ones_like()?; /// // b == a + 1 - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn ones_like(&self) -> Result { Tensor::ones(self.shape(), self.dtype(), self.device()) @@ -249,11 +249,11 @@ impl Tensor { /// Creates a new tensor filled with zeros. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, DType, Device}; + /// use diffusion_rs_common::core::{Tensor, DType, Device}; /// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// let b = Tensor::from_slice(&[0.0f32, 0.0, 0.0, 0.0, 0.0, 0.0], (2, 3), &Device::Cpu)?; /// // a == b - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn zeros>(shape: S, dtype: DType, device: &Device) -> Result { Self::zeros_impl(shape, dtype, device, false) @@ -263,11 +263,11 @@ impl Tensor { /// tensor. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, DType, Device}; + /// use diffusion_rs_common::core::{Tensor, DType, Device}; /// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// let b = a.zeros_like()?; /// // b is on CPU f32. - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn zeros_like(&self) -> Result { Tensor::zeros(self.shape(), self.dtype(), self.device()) @@ -293,10 +293,10 @@ impl Tensor { /// This returns uninitialized memory. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, DType, Device}; + /// use diffusion_rs_common::core::{Tensor, DType, Device}; /// let a = unsafe { Tensor::empty((2, 3), DType::F32, &Device::Cpu)? }; /// // a == b - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub unsafe fn empty>(shape: S, dtype: DType, device: &Device) -> Result { Self::empty_impl(shape, dtype, device, false) @@ -309,10 +309,10 @@ impl Tensor { /// This returns uninitialized memory. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, DType, Device}; + /// use diffusion_rs_common::core::{Tensor, DType, Device}; /// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// let b = unsafe { a.empty_like()? }; - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub unsafe fn empty_like(&self) -> Result { Tensor::empty(self.shape(), self.dtype(), self.device()) @@ -433,14 +433,14 @@ impl Tensor { /// Returns a new tensor with all the elements having the same specified value. Note that /// the tensor is not contiguous so you would have to call `.contiguous()` on it if needed. ///```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let a = Tensor::full(3.5, (2, 4), &Device::Cpu)?; /// /// assert_eq!(a.to_vec2::()?, &[ /// [3.5, 3.5, 3.5, 3.5], /// [3.5, 3.5, 3.5, 3.5], /// ]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) pub fn full>( value: D, shape: S, @@ -451,11 +451,11 @@ impl Tensor { /// Creates a new 1D tensor from an iterator. ///```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let a = Tensor::from_iter( [1.0, 2.0, 3.0, 4.0].into_iter(), &Device::Cpu)?; /// /// assert_eq!(a.to_vec1::()?, &[1.0, 2.0, 3.0, 4.0]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn from_iter( iter: impl IntoIterator, @@ -469,11 +469,11 @@ impl Tensor { /// Creates a new 1D tensor with values from the interval `[start, end)` taken with a common /// difference `1` from `start`. ///```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let a = Tensor::arange(2., 5., &Device::Cpu)?; /// /// assert_eq!(a.to_vec1::()?, &[2., 3., 4.]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn arange(start: D, end: D, device: &Device) -> Result { Self::arange_step(start, end, D::one(), device) @@ -482,11 +482,11 @@ impl Tensor { /// Creates a new 1D tensor with values from the interval `[start, end)` taken with a common /// difference `step` from `start`. ///```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let a = Tensor::arange_step(2.0, 4.0, 0.5, &Device::Cpu)?; /// /// assert_eq!(a.to_vec1::()?, &[2.0, 2.5, 3.0, 3.5]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn arange_step( start: D, @@ -534,14 +534,14 @@ impl Tensor { /// in this vector must be the same as the number of elements defined by the shape. /// If the device is cpu, no data copy is made. ///```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let a = Tensor::from_vec(vec!{1., 2., 3., 4., 5., 6.}, (2, 3), &Device::Cpu)?; /// /// assert_eq!(a.to_vec2::()?, &[ /// [1., 2., 3.], /// [4., 5., 6.] /// ]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn from_vec, D: crate::core::WithDType>( data: Vec, @@ -554,7 +554,7 @@ impl Tensor { /// Creates a new tensor initialized with values from the input slice. The number of elements /// in this vector must be the same as the number of elements defined by the shape. ///```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let values = vec![1., 2., 3., 4., 5., 6., 7., 8.]; /// let a = Tensor::from_slice(&values[1..7], (2, 3), &Device::Cpu)?; /// @@ -562,7 +562,7 @@ impl Tensor { /// [2., 3., 4.], /// [5., 6., 7.] /// ]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn from_slice, D: crate::core::WithDType>( array: &[D], @@ -710,7 +710,7 @@ impl Tensor { /// # Examples /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device, Shape}; + /// use diffusion_rs_common::core::{Tensor, Device, Shape}; /// let x = Tensor::new(&[1f32, 2., 3.], &Device::Cpu)?; /// let y = Tensor::new(&[4f32, 5., 6.], &Device::Cpu)?; /// @@ -726,7 +726,7 @@ impl Tensor { /// /// assert_eq!(grids_ij[0].to_vec2::()?, &[[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]); /// assert_eq!(grids_ij[1].to_vec2::()?, &[[4., 5., 6.], [4., 5., 6.], [4., 5., 6.]]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` /// /// # Errors @@ -769,11 +769,11 @@ impl Tensor { /// be performed. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let a = Tensor::new(&[[0f32, 1.], [2., 3.]], &Device::Cpu)?; /// let a = a.affine(4., -2.)?; /// assert_eq!(a.to_vec2::()?, &[[-2.0, 2.0], [6.0, 10.0]]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn affine(&self, mul: f64, add: f64) -> Result { if self.elem_count() == 0 { @@ -846,7 +846,7 @@ impl Tensor { /// Returns a new tensor that is a narrowed version of the input, the dimension `dim` /// ranges from `start` to `start + len`. /// ``` - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let a = Tensor::new(&[ /// [0f32, 1., 2.], /// [3. , 4., 5.], @@ -867,7 +867,7 @@ impl Tensor { /// [4.], /// [7.] /// ]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn narrow(&self, dim: D, start: usize, len: usize) -> Result { let dims = self.dims(); @@ -971,14 +971,14 @@ impl Tensor { /// Elements that are shifted beyond the last position are re-introduced at the first position. /// /// ```rust - /// # use diffuse_rs_common::core::{Tensor, Device}; + /// # use diffusion_rs_common::core::{Tensor, Device}; /// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?; /// let tensor = tensor.roll(1, 0)?; /// assert_eq!(tensor.to_vec2::()?, &[[4., 5.], [0., 1.], [2., 3.]]); /// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?; /// let tensor = tensor.roll(-1, 0)?; /// assert_eq!(tensor.to_vec2::()?, &[[2., 3.], [4., 5.], [0., 1.]]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn roll(&self, shift: i32, dim: D) -> Result where @@ -1003,7 +1003,7 @@ impl Tensor { /// that the number of elements for each dimension index in `sum_dims` is 1. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let a = Tensor::new(&[[0f32, 1.], [2., 3.]], &Device::Cpu)?; /// let s = a.sum_keepdim(0)?; /// assert_eq!(s.to_vec2::()?, &[[2., 4.]]); @@ -1011,7 +1011,7 @@ impl Tensor { /// assert_eq!(s.to_vec2::()?, &[[1.], [5.]]); /// let s = a.sum_keepdim((0, 1))?; /// assert_eq!(s.to_vec2::()?, &[[6.]]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn sum_keepdim(&self, sum_dims: D) -> Result { self.sum_impl(sum_dims, true) @@ -1031,7 +1031,7 @@ impl Tensor { /// that the number of elements for each dimension index in `mean_dims` is 1. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let a = Tensor::new(&[[0f32, 1.], [2., 3.]], &Device::Cpu)?; /// let s = a.mean_keepdim(0)?; /// assert_eq!(s.to_vec2::()?, &[[1., 2.]]); @@ -1039,7 +1039,7 @@ impl Tensor { /// assert_eq!(s.to_vec2::()?, &[[0.5], [2.5]]); /// let s = a.mean_keepdim((0, 1))?; /// assert_eq!(s.to_vec2::()?, &[[1.5]]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn mean_keepdim(&self, mean_dims: D) -> Result { let mean_dims = mean_dims.to_indexes(self.shape(), "mean-keepdim")?; @@ -1559,12 +1559,12 @@ impl Tensor { /// vocabulary size, and `h` the hidden size. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let values = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?; /// let ids = Tensor::new(&[2u32, 1u32, 2u32], &Device::Cpu)?; /// let emb = values.embedding(&ids)?; /// assert_eq!(emb.to_vec2::()?, &[[4., 5.], [2., 3.], [4., 5.]]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn embedding(&self, ids: &Self) -> Result { if self.rank() != 2 || ids.rank() != 1 { @@ -1754,11 +1754,11 @@ impl Tensor { /// scalar with zero dimensions. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?; /// let tensor = tensor.sum_all()?; /// assert_eq!(tensor.to_scalar::()?, 15.); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn sum_all(&self) -> Result { let dims: Vec<_> = (0..self.rank()).collect(); @@ -1819,11 +1819,11 @@ impl Tensor { /// Flattens the input tensor by reshaping it into a one dimension tensor. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?; /// let tensor = tensor.flatten_all()?; /// assert_eq!(tensor.to_vec1::()?, &[0., 1., 2., 3., 4., 5.]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn flatten_all(&self) -> Result { self.flatten_(None::, None::) @@ -1832,13 +1832,13 @@ impl Tensor { /// Returns the sub-tensor fixing the index at `i` on the first dimension. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?; /// let t = tensor.get(0)?; /// assert_eq!(t.to_vec1::()?, &[0., 1.]); /// let t = tensor.get(1)?; /// assert_eq!(t.to_vec1::()?, &[2., 3.]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn get(&self, i: usize) -> Result { let dims = self.dims(); @@ -1852,7 +1852,7 @@ impl Tensor { /// Returns the sub-tensor fixing the index at `index` on the dimension `dim`. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?; /// let t = tensor.get_on_dim(1, 0)?; /// assert_eq!(t.to_vec1::()?, &[0., 2., 4.]); @@ -1860,7 +1860,7 @@ impl Tensor { /// assert_eq!(t.to_vec1::()?, &[1., 3., 5.]); /// let t = tensor.get_on_dim(0, 1)?; /// assert_eq!(t.to_vec1::()?, &[2., 3.]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn get_on_dim(&self, dim: D, index: usize) -> Result { let dim = dim.to_index(self.shape(), "get_on_dim")?; @@ -1871,11 +1871,11 @@ impl Tensor { /// input are swapped. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let tensor = Tensor::new(&[[0f32, 1.], [2., 3.], [4., 5.]], &Device::Cpu)?; /// let tensor = tensor.t()?; /// assert_eq!(tensor.to_vec2::()?, &[[0.0, 2.0, 4.0], [1.0, 3.0, 5.0]]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn t(&self) -> Result { let rank = self.rank(); @@ -1915,12 +1915,12 @@ impl Tensor { /// dims must be a permutation, i.e. include each dimension index exactly once. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let tensor = Tensor::arange(0u32, 120u32, &Device::Cpu)?.reshape((2, 3, 4, 5))?; /// assert_eq!(tensor.dims(), &[2, 3, 4, 5]); /// let tensor = tensor.permute((2, 3, 1, 0))?; /// assert_eq!(tensor.dims(), &[4, 5, 3, 2]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn permute(&self, dims: D) -> Result { let dims = dims.to_indexes(self.shape(), "permute")?; @@ -2074,12 +2074,12 @@ impl Tensor { /// Casts the input tensor to the target `dtype`. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, Device}; + /// use diffusion_rs_common::core::{Tensor, Device}; /// let tensor = Tensor::new(3.14159265358979f64, &Device::Cpu)?; /// assert_eq!(tensor.to_scalar::()?, 3.14159265358979); - /// let tensor = tensor.to_dtype(diffuse_rs_common::core::DType::F32)?; + /// let tensor = tensor.to_dtype(diffusion_rs_common::core::DType::F32)?; /// assert_eq!(tensor.to_scalar::()?, 3.1415927); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn to_dtype(&self, dtype: DType) -> Result { if self.dtype() == dtype { @@ -2137,7 +2137,7 @@ impl Tensor { /// as to match the number of elements in the tensor. /// /// ```rust - /// # use diffuse_rs_common::core::{Tensor, DType, Device, D}; + /// # use diffusion_rs_common::core::{Tensor, DType, Device, D}; /// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// /// let c = a.reshape((1, 6))?; @@ -2149,7 +2149,7 @@ impl Tensor { /// let c = a.reshape((2, (), 1))?; /// assert_eq!(c.shape().dims(), &[2, 3, 1]); /// - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn reshape(&self, s: S) -> Result { let shape = s.into_shape(self.elem_count())?; @@ -2184,7 +2184,7 @@ impl Tensor { /// Creates a new tensor with the specified dimension removed if its size was one. /// /// ```rust - /// # use diffuse_rs_common::core::{Tensor, DType, Device, D}; + /// # use diffusion_rs_common::core::{Tensor, DType, Device, D}; /// let a = Tensor::zeros((2, 3, 1), DType::F32, &Device::Cpu)?; /// /// let c = a.squeeze(2)?; @@ -2192,7 +2192,7 @@ impl Tensor { /// /// let c = a.squeeze(D::Minus1)?; /// assert_eq!(c.shape().dims(), &[2, 3]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn squeeze(&self, dim: D) -> Result { // The PyTorch semantics are to return the same tensor if the target dimension @@ -2222,7 +2222,7 @@ impl Tensor { /// Creates a new tensor with a dimension of size one inserted at the specified position. /// /// ```rust - /// # use diffuse_rs_common::core::{Tensor, DType, Device, D}; + /// # use diffusion_rs_common::core::{Tensor, DType, Device, D}; /// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// /// let c = a.unsqueeze(0)?; @@ -2230,7 +2230,7 @@ impl Tensor { /// /// let c = a.unsqueeze(D::Minus1)?; /// assert_eq!(c.shape().dims(), &[2, 3, 1]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn unsqueeze(&self, dim: D) -> Result { let mut dims = self.dims().to_vec(); @@ -2259,7 +2259,7 @@ impl Tensor { /// All tensors must have the same rank, and the output has one additional rank /// /// ```rust - /// # use diffuse_rs_common::core::{Tensor, DType, Device}; + /// # use diffusion_rs_common::core::{Tensor, DType, Device}; /// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// let b = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// @@ -2268,7 +2268,7 @@ impl Tensor { /// /// let c = Tensor::stack(&[&a, &b], 2)?; /// assert_eq!(c.shape().dims(), &[2, 3, 2]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn stack, D: Dim>(args: &[A], dim: D) -> Result { if args.is_empty() { diff --git a/diffuse_rs_common/src/core/tensor_cat.rs b/diffusion_rs_common/src/core/tensor_cat.rs similarity index 98% rename from diffuse_rs_common/src/core/tensor_cat.rs rename to diffusion_rs_common/src/core/tensor_cat.rs index 7f99c2c..dee402d 100644 --- a/diffuse_rs_common/src/core/tensor_cat.rs +++ b/diffusion_rs_common/src/core/tensor_cat.rs @@ -7,7 +7,7 @@ impl Tensor { /// the same rank /// /// ```rust - /// # use diffuse_rs_common::core::{Tensor, DType, Device}; + /// # use diffusion_rs_common::core::{Tensor, DType, Device}; /// let a = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// let b = Tensor::zeros((2, 3), DType::F32, &Device::Cpu)?; /// @@ -16,7 +16,7 @@ impl Tensor { /// /// let c = Tensor::cat(&[&a, &b], 1)?; /// assert_eq!(c.shape().dims(), &[2, 6]); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn cat, D: Dim>(args: &[A], dim: D) -> Result { if args.is_empty() { diff --git a/diffuse_rs_common/src/core/tensor_indexing.rs b/diffusion_rs_common/src/core/tensor_indexing.rs similarity index 99% rename from diffuse_rs_common/src/core/tensor_indexing.rs rename to diffusion_rs_common/src/core/tensor_indexing.rs index a5aebec..58f37a5 100644 --- a/diffuse_rs_common/src/core/tensor_indexing.rs +++ b/diffusion_rs_common/src/core/tensor_indexing.rs @@ -72,7 +72,7 @@ impl Tensor { /// /// # Example /// ```rust - /// use diffuse_rs_common::core::{Device, Tensor}; + /// use diffusion_rs_common::core::{Device, Tensor}; /// /// let dev = Device::Cpu; /// let tensor = Tensor::arange(0u32, 4 * 5, &dev)?.reshape((4, 5))?; @@ -87,7 +87,7 @@ impl Tensor { /// [15, 16, 17, 18, 19] /// ] /// ); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn slice_assign(&self, ranges: &[&dyn RangeBound], src: &Tensor) -> Result { let src_dims = src.dims(); diff --git a/diffuse_rs_common/src/core/test_utils.rs b/diffusion_rs_common/src/core/test_utils.rs similarity index 100% rename from diffuse_rs_common/src/core/test_utils.rs rename to diffusion_rs_common/src/core/test_utils.rs diff --git a/diffuse_rs_common/src/core/tests/conv_tests.rs b/diffusion_rs_common/src/core/tests/conv_tests.rs similarity index 99% rename from diffuse_rs_common/src/core/tests/conv_tests.rs rename to diffusion_rs_common/src/core/tests/conv_tests.rs index 3b5413f..fe216e2 100644 --- a/diffuse_rs_common/src/core/tests/conv_tests.rs +++ b/diffusion_rs_common/src/core/tests/conv_tests.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use diffuse_rs_common::core::{test_device, test_utils, Device, IndexOp, Tensor}; +use diffusion_rs_common::core::{test_device, test_utils, Device, IndexOp, Tensor}; /* This test is based on the following script. import torch @@ -385,7 +385,7 @@ print(w.grad[0]) */ fn conv2d_grad(dev: &Device) -> Result<()> { // conv-transposes are not implemented for metal - use diffuse_rs_common::core::Var; + use diffusion_rs_common::core::Var; let t = Var::from_slice( &[ 0.4056f32, -0.8689, -0.0773, -1.5630, -2.8012, -1.5059, 0.3972, 1.0852, 0.4997, 3.0616, diff --git a/diffuse_rs_common/src/core/tests/custom_op_tests.rs b/diffusion_rs_common/src/core/tests/custom_op_tests.rs similarity index 83% rename from diffuse_rs_common/src/core/tests/custom_op_tests.rs rename to diffusion_rs_common/src/core/tests/custom_op_tests.rs index 5712cd0..6f1ef63 100644 --- a/diffuse_rs_common/src/core/tests/custom_op_tests.rs +++ b/diffusion_rs_common/src/core/tests/custom_op_tests.rs @@ -1,7 +1,7 @@ -use diffuse_rs_common::core::backend::BackendStorage; -use diffuse_rs_common::core::cpu_backend; -use diffuse_rs_common::core::test_utils::to_vec1_round; -use diffuse_rs_common::core::{CpuStorage, CustomOp1, DType, Device, Error, Layout, Result, Shape, Tensor}; +use diffusion_rs_common::core::backend::BackendStorage; +use diffusion_rs_common::core::cpu_backend; +use diffusion_rs_common::core::test_utils::to_vec1_round; +use diffusion_rs_common::core::{CpuStorage, CustomOp1, DType, Device, Error, Layout, Result, Shape, Tensor}; fn fwd(v: T, alpha: f64) -> T { if v.is_sign_positive() { @@ -22,7 +22,7 @@ impl CustomOp1 for Elu { } fn cpu_fwd(&self, s: &CpuStorage, l: &Layout) -> Result<(CpuStorage, Shape)> { - let storage = diffuse_rs_common::core::map_dtype!( + let storage = diffusion_rs_common::core::map_dtype!( "elu", s, |s| cpu_backend::unary_map(s, l, |v| fwd(v, self.alpha)), @@ -65,7 +65,7 @@ impl CustomOp1 for EluBackward { } fn cpu_fwd(&self, s: &CpuStorage, l: &Layout) -> Result<(CpuStorage, Shape)> { - let storage = diffuse_rs_common::core::map_dtype!( + let storage = diffusion_rs_common::core::map_dtype!( "elu-bwd", s, |s| cpu_backend::unary_map(s, l, |v| bwd(v, self.alpha)), @@ -102,7 +102,7 @@ impl CustomOp1 for EluWithBackward { #[test] fn custom_op1_with_backward() -> Result<()> { let cpu = &Device::Cpu; - let t = diffuse_rs_common::core::Var::new(&[-2f32, 0f32, 2f32], cpu)?; + let t = diffusion_rs_common::core::Var::new(&[-2f32, 0f32, 2f32], cpu)?; let elu_t = t.apply_op1(EluWithBackward::new(2.))?; assert_eq!(to_vec1_round(&elu_t, 4)?, &[-1.7293, 0.0, 2.0]); @@ -113,7 +113,7 @@ fn custom_op1_with_backward() -> Result<()> { Ok(()) } -impl diffuse_rs_common::core::InplaceOp1 for Elu { +impl diffusion_rs_common::core::InplaceOp1 for Elu { fn name(&self) -> &'static str { "elu" } @@ -125,7 +125,7 @@ impl diffuse_rs_common::core::InplaceOp1 for Elu { CpuStorage::F16(s) => s.iter_mut().for_each(|v| *v = fwd(*v, alpha)), CpuStorage::F32(s) => s.iter_mut().for_each(|v| *v = fwd(*v, alpha)), CpuStorage::F64(s) => s.iter_mut().for_each(|v| *v = fwd(*v, alpha)), - _ => diffuse_rs_common::bail!("unsupported dtype for inplace elu"), + _ => diffusion_rs_common::bail!("unsupported dtype for inplace elu"), } Ok(()) } @@ -160,14 +160,14 @@ fn ug_op() -> Result<()> { let opts: ug::lower_op::Opts = Default::default(); kernel.lower(&opts.with_global(0, 12))? }; - let device = if diffuse_rs_common::core::utils::cuda_is_available() { + let device = if diffusion_rs_common::core::utils::cuda_is_available() { Device::new_cuda(0)? - } else if diffuse_rs_common::core::utils::metal_is_available() { + } else if diffusion_rs_common::core::utils::metal_is_available() { Device::new_metal(0)? } else { - diffuse_rs_common::bail!("metal/cuda is mandatory for this test") + diffusion_rs_common::bail!("metal/cuda is mandatory for this test") }; - let op = diffuse_rs_common::core::UgIOp1::new("test", kernel, &device)?; + let op = diffusion_rs_common::core::UgIOp1::new("test", kernel, &device)?; let t = Tensor::arange(0u32, 12u32, &device)?.to_dtype(DType::F32)?; t.inplace_op1(&op)?; assert_eq!( diff --git a/diffuse_rs_common/src/core/tests/display_tests.rs b/diffusion_rs_common/src/core/tests/display_tests.rs similarity index 97% rename from diffuse_rs_common/src/core/tests/display_tests.rs rename to diffusion_rs_common/src/core/tests/display_tests.rs index 8dae43c..93319a3 100644 --- a/diffuse_rs_common/src/core/tests/display_tests.rs +++ b/diffusion_rs_common/src/core/tests/display_tests.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use diffuse_rs_common::core::{DType, Device::Cpu, Tensor}; +use diffusion_rs_common::core::{DType, Device::Cpu, Tensor}; #[test] fn display_scalar() -> Result<()> { diff --git a/diffuse_rs_common/src/core/tests/fortran_tensor_3d.pth b/diffusion_rs_common/src/core/tests/fortran_tensor_3d.pth similarity index 100% rename from diffuse_rs_common/src/core/tests/fortran_tensor_3d.pth rename to diffusion_rs_common/src/core/tests/fortran_tensor_3d.pth diff --git a/diffuse_rs_common/src/core/tests/grad_tests.rs b/diffusion_rs_common/src/core/tests/grad_tests.rs similarity index 99% rename from diffuse_rs_common/src/core/tests/grad_tests.rs rename to diffusion_rs_common/src/core/tests/grad_tests.rs index b06c001..644f193 100644 --- a/diffuse_rs_common/src/core/tests/grad_tests.rs +++ b/diffusion_rs_common/src/core/tests/grad_tests.rs @@ -1,6 +1,6 @@ #![allow(clippy::approx_constant)] use anyhow::{Context, Result}; -use diffuse_rs_common::core::{test_device, test_utils, Device, Shape, Tensor, Var}; +use diffusion_rs_common::core::{test_device, test_utils, Device, Shape, Tensor, Var}; fn simple_grad(device: &Device) -> Result<()> { let x = Var::new(&[3f32, 1., 4.], device)?; diff --git a/diffuse_rs_common/src/core/tests/indexing_tests.rs b/diffusion_rs_common/src/core/tests/indexing_tests.rs similarity index 99% rename from diffuse_rs_common/src/core/tests/indexing_tests.rs rename to diffusion_rs_common/src/core/tests/indexing_tests.rs index 99fcc63..c7d9f02 100644 --- a/diffuse_rs_common/src/core/tests/indexing_tests.rs +++ b/diffusion_rs_common/src/core/tests/indexing_tests.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use diffuse_rs_common::core::{Device, IndexOp, Tensor}; +use diffusion_rs_common::core::{Device, IndexOp, Tensor}; #[test] fn integer_index() -> Result<()> { diff --git a/diffuse_rs_common/src/core/tests/layout_tests.rs b/diffusion_rs_common/src/core/tests/layout_tests.rs similarity index 100% rename from diffuse_rs_common/src/core/tests/layout_tests.rs rename to diffusion_rs_common/src/core/tests/layout_tests.rs diff --git a/diffuse_rs_common/src/core/tests/matmul_tests.rs b/diffusion_rs_common/src/core/tests/matmul_tests.rs similarity index 98% rename from diffuse_rs_common/src/core/tests/matmul_tests.rs rename to diffusion_rs_common/src/core/tests/matmul_tests.rs index 07be82e..5ebf4f8 100644 --- a/diffuse_rs_common/src/core/tests/matmul_tests.rs +++ b/diffusion_rs_common/src/core/tests/matmul_tests.rs @@ -1,4 +1,4 @@ -use diffuse_rs_common::core::{test_device, DType, Device, IndexOp, Result, Tensor}; +use diffusion_rs_common::core::{test_device, DType, Device, IndexOp, Result, Tensor}; fn matmul(device: &Device) -> Result<()> { let data = vec![1.0f32, 2.0, 3.0, 4.0]; diff --git a/diffuse_rs_common/src/core/tests/npy.py b/diffusion_rs_common/src/core/tests/npy.py similarity index 100% rename from diffuse_rs_common/src/core/tests/npy.py rename to diffusion_rs_common/src/core/tests/npy.py diff --git a/diffuse_rs_common/src/core/tests/pool_tests.rs b/diffusion_rs_common/src/core/tests/pool_tests.rs similarity index 97% rename from diffuse_rs_common/src/core/tests/pool_tests.rs rename to diffusion_rs_common/src/core/tests/pool_tests.rs index e706d16..1bb3804 100644 --- a/diffuse_rs_common/src/core/tests/pool_tests.rs +++ b/diffusion_rs_common/src/core/tests/pool_tests.rs @@ -1,4 +1,4 @@ -use diffuse_rs_common::core::{test_device, test_utils, Device, IndexOp, Result, Tensor}; +use diffusion_rs_common::core::{test_device, test_utils, Device, IndexOp, Result, Tensor}; // https://github.com/huggingface/candle/issues/364 fn avg_pool2d(dev: &Device) -> Result<()> { diff --git a/diffuse_rs_common/src/core/tests/pth.py b/diffusion_rs_common/src/core/tests/pth.py similarity index 100% rename from diffuse_rs_common/src/core/tests/pth.py rename to diffusion_rs_common/src/core/tests/pth.py diff --git a/diffuse_rs_common/src/core/tests/pth_tests.rs b/diffusion_rs_common/src/core/tests/pth_tests.rs similarity index 66% rename from diffuse_rs_common/src/core/tests/pth_tests.rs rename to diffusion_rs_common/src/core/tests/pth_tests.rs index 0edaac1..e12f63d 100644 --- a/diffuse_rs_common/src/core/tests/pth_tests.rs +++ b/diffusion_rs_common/src/core/tests/pth_tests.rs @@ -1,14 +1,14 @@ /// Regression test for pth files not loading on Windows. #[test] fn test_pth() { - let tensors = diffuse_rs_common::core::pickle::PthTensors::new("tests/test.pt", None).unwrap(); + let tensors = diffusion_rs_common::core::pickle::PthTensors::new("tests/test.pt", None).unwrap(); tensors.get("test").unwrap().unwrap(); } #[test] fn test_pth_with_key() { let tensors = - diffuse_rs_common::core::pickle::PthTensors::new("tests/test_with_key.pt", Some("model_state_dict")) + diffusion_rs_common::core::pickle::PthTensors::new("tests/test_with_key.pt", Some("model_state_dict")) .unwrap(); tensors.get("test").unwrap().unwrap(); } @@ -16,7 +16,7 @@ fn test_pth_with_key() { #[test] fn test_pth_fortran_congiguous() { let tensors = - diffuse_rs_common::core::pickle::PthTensors::new("tests/fortran_tensor_3d.pth", None).unwrap(); + diffusion_rs_common::core::pickle::PthTensors::new("tests/fortran_tensor_3d.pth", None).unwrap(); let tensor = tensors.get("tensor_fortran").unwrap().unwrap(); assert_eq!(tensor.dims3().unwrap(), (2, 3, 4)); diff --git a/diffuse_rs_common/src/core/tests/quantized_tests.rs b/diffusion_rs_common/src/core/tests/quantized_tests.rs similarity index 99% rename from diffuse_rs_common/src/core/tests/quantized_tests.rs rename to diffusion_rs_common/src/core/tests/quantized_tests.rs index 80af749..52e3053 100644 --- a/diffuse_rs_common/src/core/tests/quantized_tests.rs +++ b/diffusion_rs_common/src/core/tests/quantized_tests.rs @@ -1,4 +1,4 @@ -use diffuse_rs_common::core::{ +use diffusion_rs_common::core::{ bail, quantized::{self, GgmlDType}, test_device, diff --git a/diffuse_rs_common/src/core/tests/serialization_tests.rs b/diffusion_rs_common/src/core/tests/serialization_tests.rs similarity index 76% rename from diffuse_rs_common/src/core/tests/serialization_tests.rs rename to diffusion_rs_common/src/core/tests/serialization_tests.rs index e62c5f3..7ecea4a 100644 --- a/diffuse_rs_common/src/core/tests/serialization_tests.rs +++ b/diffusion_rs_common/src/core/tests/serialization_tests.rs @@ -1,4 +1,4 @@ -use diffuse_rs_common::core::{DType, Result, Tensor}; +use diffusion_rs_common::core::{DType, Result, Tensor}; struct TmpFile(std::path::PathBuf); @@ -51,20 +51,20 @@ fn npz() -> Result<()> { #[test] fn safetensors() -> Result<()> { - use diffuse_rs_common::core::safetensors::Load; + use diffusion_rs_common::core::safetensors::Load; let tmp_file = TmpFile::create("st"); - let t = Tensor::arange(0f32, 24f32, &diffuse_rs_common::core::Device::Cpu)?; + let t = Tensor::arange(0f32, 24f32, &diffusion_rs_common::core::Device::Cpu)?; t.save_safetensors("t", &tmp_file)?; // Load from file. - let st = diffuse_rs_common::core::safetensors::load(&tmp_file, &diffuse_rs_common::core::Device::Cpu)?; + let st = diffusion_rs_common::core::safetensors::load(&tmp_file, &diffusion_rs_common::core::Device::Cpu)?; let t2 = st.get("t").unwrap(); let diff = (&t - t2)?.abs()?.sum_all()?.to_vec0::()?; assert_eq!(diff, 0f32); // Load from bytes. let bytes = std::fs::read(tmp_file)?; - let st = diffuse_rs_common::core::safetensors::SliceSafetensors::new(&bytes)?; - let t2 = st.get("t").unwrap().load(&diffuse_rs_common::core::Device::Cpu); + let st = diffusion_rs_common::core::safetensors::SliceSafetensors::new(&bytes)?; + let t2 = st.get("t").unwrap().load(&diffusion_rs_common::core::Device::Cpu); let diff = (&t - t2)?.abs()?.sum_all()?.to_vec0::()?; assert_eq!(diff, 0f32); Ok(()) diff --git a/diffuse_rs_common/src/core/tests/tensor_tests.rs b/diffusion_rs_common/src/core/tests/tensor_tests.rs similarity index 99% rename from diffuse_rs_common/src/core/tests/tensor_tests.rs rename to diffusion_rs_common/src/core/tests/tensor_tests.rs index a988bf9..8d55b24 100644 --- a/diffuse_rs_common/src/core/tests/tensor_tests.rs +++ b/diffusion_rs_common/src/core/tests/tensor_tests.rs @@ -1,4 +1,4 @@ -use diffuse_rs_common::core::{test_device, test_utils, DType, Device, IndexOp, Result, Tensor, D}; +use diffusion_rs_common::core::{test_device, test_utils, DType, Device, IndexOp, Result, Tensor, D}; fn zeros(device: &Device) -> Result<()> { let tensor = Tensor::zeros((5, 2), DType::F32, device)?; @@ -1545,7 +1545,7 @@ test_device!(zero_dim, zero_dim_cpu, zero_dim_gpu, zero_dim_metal); fn randn_hasneg() -> Result<()> { let t = Tensor::randn(0f32, 1f32, 200, &Device::Cpu)?.to_vec1::()?; if t.iter().all(|&v| v >= 0.) { - diffuse_rs_common::bail!("all values in tensors are non-negative") + diffusion_rs_common::bail!("all values in tensors are non-negative") } Ok(()) } diff --git a/diffuse_rs_common/src/core/tests/test.npy b/diffusion_rs_common/src/core/tests/test.npy similarity index 100% rename from diffuse_rs_common/src/core/tests/test.npy rename to diffusion_rs_common/src/core/tests/test.npy diff --git a/diffuse_rs_common/src/core/tests/test.npz b/diffusion_rs_common/src/core/tests/test.npz similarity index 100% rename from diffuse_rs_common/src/core/tests/test.npz rename to diffusion_rs_common/src/core/tests/test.npz diff --git a/diffuse_rs_common/src/core/tests/test.pt b/diffusion_rs_common/src/core/tests/test.pt similarity index 100% rename from diffuse_rs_common/src/core/tests/test.pt rename to diffusion_rs_common/src/core/tests/test.pt diff --git a/diffuse_rs_common/src/core/tests/test_with_key.pt b/diffusion_rs_common/src/core/tests/test_with_key.pt similarity index 100% rename from diffuse_rs_common/src/core/tests/test_with_key.pt rename to diffusion_rs_common/src/core/tests/test_with_key.pt diff --git a/diffuse_rs_common/src/core/utils.rs b/diffusion_rs_common/src/core/utils.rs similarity index 100% rename from diffuse_rs_common/src/core/utils.rs rename to diffusion_rs_common/src/core/utils.rs diff --git a/diffuse_rs_common/src/core/variable.rs b/diffusion_rs_common/src/core/variable.rs similarity index 100% rename from diffuse_rs_common/src/core/variable.rs rename to diffusion_rs_common/src/core/variable.rs diff --git a/diffuse_rs_common/src/cuda_kernels/LICENSE b/diffusion_rs_common/src/cuda_kernels/LICENSE similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/LICENSE rename to diffusion_rs_common/src/cuda_kernels/LICENSE diff --git a/diffuse_rs_common/src/cuda_kernels/affine.cu b/diffusion_rs_common/src/cuda_kernels/affine.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/affine.cu rename to diffusion_rs_common/src/cuda_kernels/affine.cu diff --git a/diffuse_rs_common/src/cuda_kernels/binary.cu b/diffusion_rs_common/src/cuda_kernels/binary.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/binary.cu rename to diffusion_rs_common/src/cuda_kernels/binary.cu diff --git a/diffuse_rs_common/src/cuda_kernels/binary_op_macros.cuh b/diffusion_rs_common/src/cuda_kernels/binary_op_macros.cuh similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/binary_op_macros.cuh rename to diffusion_rs_common/src/cuda_kernels/binary_op_macros.cuh diff --git a/diffuse_rs_common/src/cuda_kernels/cast.cu b/diffusion_rs_common/src/cuda_kernels/cast.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/cast.cu rename to diffusion_rs_common/src/cuda_kernels/cast.cu diff --git a/diffuse_rs_common/src/cuda_kernels/compatibility.cuh b/diffusion_rs_common/src/cuda_kernels/compatibility.cuh similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/compatibility.cuh rename to diffusion_rs_common/src/cuda_kernels/compatibility.cuh diff --git a/diffuse_rs_common/src/cuda_kernels/conv.cu b/diffusion_rs_common/src/cuda_kernels/conv.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/conv.cu rename to diffusion_rs_common/src/cuda_kernels/conv.cu diff --git a/diffuse_rs_common/src/cuda_kernels/cuda_utils.cuh b/diffusion_rs_common/src/cuda_kernels/cuda_utils.cuh similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/cuda_utils.cuh rename to diffusion_rs_common/src/cuda_kernels/cuda_utils.cuh diff --git a/diffuse_rs_common/src/cuda_kernels/fill.cu b/diffusion_rs_common/src/cuda_kernels/fill.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/fill.cu rename to diffusion_rs_common/src/cuda_kernels/fill.cu diff --git a/diffuse_rs_common/src/cuda_kernels/fused_rms_norm.cu b/diffusion_rs_common/src/cuda_kernels/fused_rms_norm.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/fused_rms_norm.cu rename to diffusion_rs_common/src/cuda_kernels/fused_rms_norm.cu diff --git a/diffuse_rs_common/src/cuda_kernels/fused_rope.cu b/diffusion_rs_common/src/cuda_kernels/fused_rope.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/fused_rope.cu rename to diffusion_rs_common/src/cuda_kernels/fused_rope.cu diff --git a/diffuse_rs_common/src/cuda_kernels/indexing.cu b/diffusion_rs_common/src/cuda_kernels/indexing.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/indexing.cu rename to diffusion_rs_common/src/cuda_kernels/indexing.cu diff --git a/diffuse_rs_common/src/cuda_kernels/kvconcat.cu b/diffusion_rs_common/src/cuda_kernels/kvconcat.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/kvconcat.cu rename to diffusion_rs_common/src/cuda_kernels/kvconcat.cu diff --git a/diffuse_rs_common/src/cuda_kernels/mod.rs b/diffusion_rs_common/src/cuda_kernels/mod.rs similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/mod.rs rename to diffusion_rs_common/src/cuda_kernels/mod.rs diff --git a/diffuse_rs_common/src/cuda_kernels/quantized.cu b/diffusion_rs_common/src/cuda_kernels/quantized.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/quantized.cu rename to diffusion_rs_common/src/cuda_kernels/quantized.cu diff --git a/diffuse_rs_common/src/cuda_kernels/reduce.cu b/diffusion_rs_common/src/cuda_kernels/reduce.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/reduce.cu rename to diffusion_rs_common/src/cuda_kernels/reduce.cu diff --git a/diffuse_rs_common/src/cuda_kernels/sort.cu b/diffusion_rs_common/src/cuda_kernels/sort.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/sort.cu rename to diffusion_rs_common/src/cuda_kernels/sort.cu diff --git a/diffuse_rs_common/src/cuda_kernels/ternary.cu b/diffusion_rs_common/src/cuda_kernels/ternary.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/ternary.cu rename to diffusion_rs_common/src/cuda_kernels/ternary.cu diff --git a/diffuse_rs_common/src/cuda_kernels/unary.cu b/diffusion_rs_common/src/cuda_kernels/unary.cu similarity index 100% rename from diffuse_rs_common/src/cuda_kernels/unary.cu rename to diffusion_rs_common/src/cuda_kernels/unary.cu diff --git a/diffuse_rs_common/src/lib.rs b/diffusion_rs_common/src/lib.rs similarity index 100% rename from diffuse_rs_common/src/lib.rs rename to diffusion_rs_common/src/lib.rs diff --git a/diffuse_rs_common/src/metal_kernels/LICENSE b/diffusion_rs_common/src/metal_kernels/LICENSE similarity index 100% rename from diffuse_rs_common/src/metal_kernels/LICENSE rename to diffusion_rs_common/src/metal_kernels/LICENSE diff --git a/diffuse_rs_common/src/metal_kernels/affine.metal b/diffusion_rs_common/src/metal_kernels/affine.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/affine.metal rename to diffusion_rs_common/src/metal_kernels/affine.metal diff --git a/diffuse_rs_common/src/metal_kernels/binary.metal b/diffusion_rs_common/src/metal_kernels/binary.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/binary.metal rename to diffusion_rs_common/src/metal_kernels/binary.metal diff --git a/diffuse_rs_common/src/metal_kernels/cast.metal b/diffusion_rs_common/src/metal_kernels/cast.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/cast.metal rename to diffusion_rs_common/src/metal_kernels/cast.metal diff --git a/diffuse_rs_common/src/metal_kernels/conv.metal b/diffusion_rs_common/src/metal_kernels/conv.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/conv.metal rename to diffusion_rs_common/src/metal_kernels/conv.metal diff --git a/diffuse_rs_common/src/metal_kernels/fill.metal b/diffusion_rs_common/src/metal_kernels/fill.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/fill.metal rename to diffusion_rs_common/src/metal_kernels/fill.metal diff --git a/diffuse_rs_common/src/metal_kernels/indexing.metal b/diffusion_rs_common/src/metal_kernels/indexing.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/indexing.metal rename to diffusion_rs_common/src/metal_kernels/indexing.metal diff --git a/diffuse_rs_common/src/metal_kernels/libMetalFlashAttention.metallib b/diffusion_rs_common/src/metal_kernels/libMetalFlashAttention.metallib similarity index 100% rename from diffuse_rs_common/src/metal_kernels/libMetalFlashAttention.metallib rename to diffusion_rs_common/src/metal_kernels/libMetalFlashAttention.metallib diff --git a/diffuse_rs_common/src/metal_kernels/mlx_gemm.metal b/diffusion_rs_common/src/metal_kernels/mlx_gemm.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/mlx_gemm.metal rename to diffusion_rs_common/src/metal_kernels/mlx_gemm.metal diff --git a/diffuse_rs_common/src/metal_kernels/mod.rs b/diffusion_rs_common/src/metal_kernels/mod.rs similarity index 100% rename from diffuse_rs_common/src/metal_kernels/mod.rs rename to diffusion_rs_common/src/metal_kernels/mod.rs diff --git a/diffuse_rs_common/src/metal_kernels/quantized.metal b/diffusion_rs_common/src/metal_kernels/quantized.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/quantized.metal rename to diffusion_rs_common/src/metal_kernels/quantized.metal diff --git a/diffuse_rs_common/src/metal_kernels/random.metal b/diffusion_rs_common/src/metal_kernels/random.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/random.metal rename to diffusion_rs_common/src/metal_kernels/random.metal diff --git a/diffuse_rs_common/src/metal_kernels/reduce.metal b/diffusion_rs_common/src/metal_kernels/reduce.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/reduce.metal rename to diffusion_rs_common/src/metal_kernels/reduce.metal diff --git a/diffuse_rs_common/src/metal_kernels/scaled_dot_product_attention.metal b/diffusion_rs_common/src/metal_kernels/scaled_dot_product_attention.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/scaled_dot_product_attention.metal rename to diffusion_rs_common/src/metal_kernels/scaled_dot_product_attention.metal diff --git a/diffuse_rs_common/src/metal_kernels/sort.metal b/diffusion_rs_common/src/metal_kernels/sort.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/sort.metal rename to diffusion_rs_common/src/metal_kernels/sort.metal diff --git a/diffuse_rs_common/src/metal_kernels/ternary.metal b/diffusion_rs_common/src/metal_kernels/ternary.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/ternary.metal rename to diffusion_rs_common/src/metal_kernels/ternary.metal diff --git a/diffuse_rs_common/src/metal_kernels/tests.rs b/diffusion_rs_common/src/metal_kernels/tests.rs similarity index 100% rename from diffuse_rs_common/src/metal_kernels/tests.rs rename to diffusion_rs_common/src/metal_kernels/tests.rs diff --git a/diffuse_rs_common/src/metal_kernels/unary.metal b/diffusion_rs_common/src/metal_kernels/unary.metal similarity index 100% rename from diffuse_rs_common/src/metal_kernels/unary.metal rename to diffusion_rs_common/src/metal_kernels/unary.metal diff --git a/diffuse_rs_common/src/metal_kernels/utils.rs b/diffusion_rs_common/src/metal_kernels/utils.rs similarity index 100% rename from diffuse_rs_common/src/metal_kernels/utils.rs rename to diffusion_rs_common/src/metal_kernels/utils.rs diff --git a/diffuse_rs_common/src/model_source.rs b/diffusion_rs_common/src/model_source.rs similarity index 99% rename from diffuse_rs_common/src/model_source.rs rename to diffusion_rs_common/src/model_source.rs index 7fc777c..8fb2835 100644 --- a/diffuse_rs_common/src/model_source.rs +++ b/diffusion_rs_common/src/model_source.rs @@ -55,7 +55,7 @@ impl ModelSource { /// with the same [base model](https://huggingface.co/black-forest-labs/FLUX.1-dev) as the original model ID. /// /// ```rust - /// use diffuse_rs_common::ModelSource; + /// use diffusion_rs_common::ModelSource; /// /// let _ = ModelSource::from_model_id("black-forest-labs/FLUX.1-dev") /// .override_transformer_model_id("sayakpaul/flux.1-dev-nf4-with-bnb-integration")?; diff --git a/diffuse_rs_common/src/nn/LICENSE b/diffusion_rs_common/src/nn/LICENSE similarity index 100% rename from diffuse_rs_common/src/nn/LICENSE rename to diffusion_rs_common/src/nn/LICENSE diff --git a/diffuse_rs_common/src/nn/activation.rs b/diffusion_rs_common/src/nn/activation.rs similarity index 100% rename from diffuse_rs_common/src/nn/activation.rs rename to diffusion_rs_common/src/nn/activation.rs diff --git a/diffuse_rs_common/src/nn/attention.rs b/diffusion_rs_common/src/nn/attention.rs similarity index 100% rename from diffuse_rs_common/src/nn/attention.rs rename to diffusion_rs_common/src/nn/attention.rs diff --git a/diffuse_rs_common/src/nn/batch_norm.rs b/diffusion_rs_common/src/nn/batch_norm.rs similarity index 100% rename from diffuse_rs_common/src/nn/batch_norm.rs rename to diffusion_rs_common/src/nn/batch_norm.rs diff --git a/diffuse_rs_common/src/nn/conv.rs b/diffusion_rs_common/src/nn/conv.rs similarity index 100% rename from diffuse_rs_common/src/nn/conv.rs rename to diffusion_rs_common/src/nn/conv.rs diff --git a/diffuse_rs_common/src/nn/embedding.rs b/diffusion_rs_common/src/nn/embedding.rs similarity index 100% rename from diffuse_rs_common/src/nn/embedding.rs rename to diffusion_rs_common/src/nn/embedding.rs diff --git a/diffuse_rs_common/src/nn/encoding.rs b/diffusion_rs_common/src/nn/encoding.rs similarity index 93% rename from diffuse_rs_common/src/nn/encoding.rs rename to diffusion_rs_common/src/nn/encoding.rs index 5875f9c..e5826b5 100644 --- a/diffuse_rs_common/src/nn/encoding.rs +++ b/diffusion_rs_common/src/nn/encoding.rs @@ -33,10 +33,10 @@ use crate::core::{DType, Result, Tensor, WithDType}; /// ## One-hot encoding /// /// ```rust -/// use diffuse_rs_common::core::{Shape, Tensor, Device}; -/// use diffuse_rs_common::nn::encoding::one_hot; +/// use diffusion_rs_common::core::{Shape, Tensor, Device}; +/// use diffusion_rs_common::nn::encoding::one_hot; /// -/// let device = diffuse_rs_common::core::Device::Cpu; +/// let device = diffusion_rs_common::core::Device::Cpu; /// /// let indices = Tensor::new(vec![vec![0i64, 2], vec![1, -1]], &device).unwrap(); /// let depth = 4; @@ -56,11 +56,11 @@ use crate::core::{DType, Result, Tensor, WithDType}; /// ## One-cold Encoding /// /// ```rust -/// use diffuse_rs_common::core::{Shape, Tensor, Device}; -/// use diffuse_rs_common::nn::encoding::one_hot; +/// use diffusion_rs_common::core::{Shape, Tensor, Device}; +/// use diffusion_rs_common::nn::encoding::one_hot; /// /// -/// let device = diffuse_rs_common::core::Device::Cpu; +/// let device = diffusion_rs_common::core::Device::Cpu; /// let depth = 4; /// let indices = Tensor::new(vec![vec![0u8, 2], vec![1, 3]], &device).unwrap(); /// let one_cold = one_hot(indices, depth, 0u8, 1u8).unwrap(); diff --git a/diffuse_rs_common/src/nn/func.rs b/diffusion_rs_common/src/nn/func.rs similarity index 100% rename from diffuse_rs_common/src/nn/func.rs rename to diffusion_rs_common/src/nn/func.rs diff --git a/diffuse_rs_common/src/nn/group_norm.rs b/diffusion_rs_common/src/nn/group_norm.rs similarity index 100% rename from diffuse_rs_common/src/nn/group_norm.rs rename to diffusion_rs_common/src/nn/group_norm.rs diff --git a/diffuse_rs_common/src/nn/init.rs b/diffusion_rs_common/src/nn/init.rs similarity index 100% rename from diffuse_rs_common/src/nn/init.rs rename to diffusion_rs_common/src/nn/init.rs diff --git a/diffuse_rs_common/src/nn/kv_cache.rs b/diffusion_rs_common/src/nn/kv_cache.rs similarity index 100% rename from diffuse_rs_common/src/nn/kv_cache.rs rename to diffusion_rs_common/src/nn/kv_cache.rs diff --git a/diffuse_rs_common/src/nn/layer_norm.rs b/diffusion_rs_common/src/nn/layer_norm.rs similarity index 98% rename from diffuse_rs_common/src/nn/layer_norm.rs rename to diffusion_rs_common/src/nn/layer_norm.rs index c9fbb01..0a04cd5 100644 --- a/diffuse_rs_common/src/nn/layer_norm.rs +++ b/diffusion_rs_common/src/nn/layer_norm.rs @@ -7,9 +7,9 @@ //! # Example //! //! ```rust -//! use diffuse_rs_common::core::{Tensor, Device::Cpu, test_utils::to_vec3_round}; -//! use diffuse_rs_common::nn::{LayerNorm, Module}; -//! # fn main() -> diffuse_rs_common::core::Result<()> { +//! use diffusion_rs_common::core::{Tensor, Device::Cpu, test_utils::to_vec3_round}; +//! use diffusion_rs_common::nn::{LayerNorm, Module}; +//! # fn main() -> diffusion_rs_common::core::Result<()> { //! //! let w = Tensor::new(&[1f32, 1f32, 1f32], &Cpu)?; //! let b = Tensor::new(&[0f32, 0f32, 0f32], &Cpu)?; diff --git a/diffuse_rs_common/src/nn/linear.rs b/diffusion_rs_common/src/nn/linear.rs similarity index 94% rename from diffuse_rs_common/src/nn/linear.rs rename to diffusion_rs_common/src/nn/linear.rs index 8a34b88..003239d 100644 --- a/diffuse_rs_common/src/nn/linear.rs +++ b/diffusion_rs_common/src/nn/linear.rs @@ -6,9 +6,9 @@ //! output has shape `(b_sz, out_c)` and `(out_c,)` respectively. //! //! ```rust -//! use diffuse_rs_common::core::{Tensor, Device::Cpu}; -//! use diffuse_rs_common::nn::{Linear, Module}; -//! # fn main() -> diffuse_rs_common::core::Result<()> { +//! use diffusion_rs_common::core::{Tensor, Device::Cpu}; +//! use diffusion_rs_common::nn::{Linear, Module}; +//! # fn main() -> diffusion_rs_common::core::Result<()> { //! //! let w = Tensor::new(&[[1f32, 2.], [3., 4.], [5., 6.]], &Cpu)?; //! let layer = Linear::new(w, None); // Use no bias. diff --git a/diffuse_rs_common/src/nn/loss.rs b/diffusion_rs_common/src/nn/loss.rs similarity index 100% rename from diffuse_rs_common/src/nn/loss.rs rename to diffusion_rs_common/src/nn/loss.rs diff --git a/diffuse_rs_common/src/nn/mod.rs b/diffusion_rs_common/src/nn/mod.rs similarity index 100% rename from diffuse_rs_common/src/nn/mod.rs rename to diffusion_rs_common/src/nn/mod.rs diff --git a/diffuse_rs_common/src/nn/ops.rs b/diffusion_rs_common/src/nn/ops.rs similarity index 99% rename from diffuse_rs_common/src/nn/ops.rs rename to diffusion_rs_common/src/nn/ops.rs index bc90d6a..24eb8aa 100644 --- a/diffuse_rs_common/src/nn/ops.rs +++ b/diffusion_rs_common/src/nn/ops.rs @@ -8,16 +8,16 @@ use rayon::prelude::*; /// a slice of fixed index on dimension `dim` are between 0 and 1 and sum to 1. /// /// ```rust -/// use diffuse_rs_common::core::{Tensor, Device, test_utils::to_vec2_round}; +/// use diffusion_rs_common::core::{Tensor, Device, test_utils::to_vec2_round}; /// let a = Tensor::new(&[[0f32, 1., 0., 1.], [-2., 2., 3., -3.]], &Device::Cpu)?; -/// let a = diffuse_rs_common::nn::ops::softmax(&a, 1)?; +/// let a = diffusion_rs_common::nn::ops::softmax(&a, 1)?; /// assert_eq!( /// to_vec2_round(&a, 4)?, /// &[ /// [0.1345, 0.3655, 0.1345, 0.3655], /// [0.0049, 0.2671, 0.7262, 0.0018] /// ]); -/// # Ok::<(), diffuse_rs_common::core::Error>(()) +/// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn softmax(xs: &Tensor, dim: D) -> Result { let dim = dim.to_index(xs.shape(), "softmax")?; @@ -722,7 +722,7 @@ impl crate::core::CustomOp2 for AttnSoftmaxLastDim { /// Softmax with fused broadcast addition of a mask and scale. /// Equivalent to: /// ```ignore -/// diffuse_rs_common::nn::ops::softmax_last_dim(&(xs.broadcast_add(&mask)? * scale as f64)?)? +/// diffusion_rs_common::nn::ops::softmax_last_dim(&(xs.broadcast_add(&mask)? * scale as f64)?)? /// ``` /// - `xs` must be a rank-4 tensor /// - `mask` must be a rank-2 matrix diff --git a/diffuse_rs_common/src/nn/optim.rs b/diffusion_rs_common/src/nn/optim.rs similarity index 100% rename from diffuse_rs_common/src/nn/optim.rs rename to diffusion_rs_common/src/nn/optim.rs diff --git a/diffuse_rs_common/src/nn/rnn.rs b/diffusion_rs_common/src/nn/rnn.rs similarity index 100% rename from diffuse_rs_common/src/nn/rnn.rs rename to diffusion_rs_common/src/nn/rnn.rs diff --git a/diffuse_rs_common/src/nn/rope.rs b/diffusion_rs_common/src/nn/rope.rs similarity index 100% rename from diffuse_rs_common/src/nn/rope.rs rename to diffusion_rs_common/src/nn/rope.rs diff --git a/diffuse_rs_common/src/nn/rotary_emb.rs b/diffusion_rs_common/src/nn/rotary_emb.rs similarity index 100% rename from diffuse_rs_common/src/nn/rotary_emb.rs rename to diffusion_rs_common/src/nn/rotary_emb.rs diff --git a/diffuse_rs_common/src/nn/sequential.rs b/diffusion_rs_common/src/nn/sequential.rs similarity index 100% rename from diffuse_rs_common/src/nn/sequential.rs rename to diffusion_rs_common/src/nn/sequential.rs diff --git a/diffuse_rs_common/src/nn/tests/batch_norm.rs b/diffusion_rs_common/src/nn/tests/batch_norm.rs similarity index 98% rename from diffuse_rs_common/src/nn/tests/batch_norm.rs rename to diffusion_rs_common/src/nn/tests/batch_norm.rs index 2c033ad..84710d1 100644 --- a/diffuse_rs_common/src/nn/tests/batch_norm.rs +++ b/diffusion_rs_common/src/nn/tests/batch_norm.rs @@ -6,7 +6,7 @@ extern crate accelerate_src; use anyhow::Result; use crate::core::{test_utils, DType, Device, Tensor}; -use diffuse_rs_common::nn::{batch_norm, BatchNorm, BatchNormConfig, VarBuilder, VarMap}; +use diffusion_rs_common::nn::{batch_norm, BatchNorm, BatchNormConfig, VarBuilder, VarMap}; /* The test below has been generated using the following PyTorch code: import torch diff --git a/diffuse_rs_common/src/nn/tests/group_norm.rs b/diffusion_rs_common/src/nn/tests/group_norm.rs similarity index 98% rename from diffuse_rs_common/src/nn/tests/group_norm.rs rename to diffusion_rs_common/src/nn/tests/group_norm.rs index 1d8d5f5..0be76a1 100644 --- a/diffuse_rs_common/src/nn/tests/group_norm.rs +++ b/diffusion_rs_common/src/nn/tests/group_norm.rs @@ -27,7 +27,7 @@ extern crate accelerate_src; use anyhow::Result; use crate::core::test_utils::to_vec3_round; use crate::core::{Device, Tensor}; -use diffuse_rs_common::nn::{GroupNorm, Module}; +use diffusion_rs_common::nn::{GroupNorm, Module}; #[test] fn group_norm() -> Result<()> { diff --git a/diffuse_rs_common/src/nn/tests/kv_cache.rs b/diffusion_rs_common/src/nn/tests/kv_cache.rs similarity index 96% rename from diffuse_rs_common/src/nn/tests/kv_cache.rs rename to diffusion_rs_common/src/nn/tests/kv_cache.rs index 73fcb64..0e94175 100644 --- a/diffuse_rs_common/src/nn/tests/kv_cache.rs +++ b/diffusion_rs_common/src/nn/tests/kv_cache.rs @@ -8,7 +8,7 @@ use crate::core::{Device, Result, Tensor}; #[test] fn kv_cache() -> Result<()> { - let mut cache = diffuse_rs_common::nn::kv_cache::Cache::new(0, 16); + let mut cache = diffusion_rs_common::nn::kv_cache::Cache::new(0, 16); for _ in [0, 1] { assert_eq!(cache.current_seq_len(), 0); let data = cache.current_data()?; @@ -33,7 +33,7 @@ fn kv_cache() -> Result<()> { #[test] fn rotating_kv_cache() -> Result<()> { - let mut cache = diffuse_rs_common::nn::kv_cache::RotatingCache::new(0, 6); + let mut cache = diffusion_rs_common::nn::kv_cache::RotatingCache::new(0, 6); for _ in [0, 1] { assert_eq!(cache.offset(), 0); assert_eq!(cache.current_seq_len(), 0); diff --git a/diffuse_rs_common/src/nn/tests/layer_norm.rs b/diffusion_rs_common/src/nn/tests/layer_norm.rs similarity index 97% rename from diffuse_rs_common/src/nn/tests/layer_norm.rs rename to diffusion_rs_common/src/nn/tests/layer_norm.rs index e09704d..487dfd3 100644 --- a/diffuse_rs_common/src/nn/tests/layer_norm.rs +++ b/diffusion_rs_common/src/nn/tests/layer_norm.rs @@ -6,7 +6,7 @@ extern crate accelerate_src; use anyhow::Result; use crate::core::{test_utils, Device, Tensor}; -use diffuse_rs_common::nn::{LayerNorm, Module}; +use diffusion_rs_common::nn::{LayerNorm, Module}; #[test] fn layer_norm() -> Result<()> { diff --git a/diffuse_rs_common/src/nn/tests/loss.rs b/diffusion_rs_common/src/nn/tests/loss.rs similarity index 87% rename from diffuse_rs_common/src/nn/tests/loss.rs rename to diffusion_rs_common/src/nn/tests/loss.rs index 54947c8..99a1a9c 100644 --- a/diffuse_rs_common/src/nn/tests/loss.rs +++ b/diffusion_rs_common/src/nn/tests/loss.rs @@ -32,10 +32,10 @@ fn nll_and_cross_entropy() -> Result<()> { )?; let target = Tensor::new(&[1u32, 0, 4], &cpu)?; - let log_softmax = diffuse_rs_common::nn::ops::log_softmax(&input, 1)?; - let loss = diffuse_rs_common::nn::loss::nll(&log_softmax, &target)?; + let log_softmax = diffusion_rs_common::nn::ops::log_softmax(&input, 1)?; + let loss = diffusion_rs_common::nn::loss::nll(&log_softmax, &target)?; assert_eq!(to_vec0_round(&loss, 4)?, 1.1312); - let loss = diffuse_rs_common::nn::loss::cross_entropy(&input, &target)?; + let loss = diffusion_rs_common::nn::loss::cross_entropy(&input, &target)?; assert_eq!(to_vec0_round(&loss, 4)?, 1.1312); Ok(()) } @@ -81,7 +81,7 @@ fn binary_cross_entropy_with_logit() -> Result<()> { let inp = Tensor::new(&inp, &cpu)?; let target = Tensor::new(&target, &cpu)?; - let loss = diffuse_rs_common::nn::loss::binary_cross_entropy_with_logit(&inp, &target)?; + let loss = diffusion_rs_common::nn::loss::binary_cross_entropy_with_logit(&inp, &target)?; assert_eq!(to_vec0_round(&loss, 4)?, 0.8224); Ok(()) diff --git a/diffuse_rs_common/src/nn/tests/one_hot.rs b/diffusion_rs_common/src/nn/tests/one_hot.rs similarity index 98% rename from diffuse_rs_common/src/nn/tests/one_hot.rs rename to diffusion_rs_common/src/nn/tests/one_hot.rs index 022de9c..00a2264 100644 --- a/diffuse_rs_common/src/nn/tests/one_hot.rs +++ b/diffusion_rs_common/src/nn/tests/one_hot.rs @@ -1,5 +1,5 @@ use crate::core::{Result, Shape, Tensor}; -use diffuse_rs_common::nn::encoding::one_hot; +use diffusion_rs_common::nn::encoding::one_hot; #[test] fn test_i64_one_hot() -> Result<()> { diff --git a/diffuse_rs_common/src/nn/tests/ops.rs b/diffusion_rs_common/src/nn/tests/ops.rs similarity index 84% rename from diffuse_rs_common/src/nn/tests/ops.rs rename to diffusion_rs_common/src/nn/tests/ops.rs index 5cec5fd..8139b54 100644 --- a/diffuse_rs_common/src/nn/tests/ops.rs +++ b/diffusion_rs_common/src/nn/tests/ops.rs @@ -9,9 +9,9 @@ use crate::core::{test_device, test_utils::to_vec3_round, Device, Result, Tensor fn softmax(device: &Device) -> Result<()> { let data = &[[[3f32, 1., 4.], [1., 5., 9.]], [[2., 1., 7.], [8., 2., 8.]]]; let tensor = Tensor::new(data, device)?; - let t0 = diffuse_rs_common::nn::ops::softmax(&tensor.log()?, 0)?; - let t1 = diffuse_rs_common::nn::ops::softmax(&tensor.log()?, 1)?; - let t2 = diffuse_rs_common::nn::ops::softmax(&tensor.log()?, 2)?; + let t0 = diffusion_rs_common::nn::ops::softmax(&tensor.log()?, 0)?; + let t1 = diffusion_rs_common::nn::ops::softmax(&tensor.log()?, 1)?; + let t2 = diffusion_rs_common::nn::ops::softmax(&tensor.log()?, 2)?; assert_eq!( to_vec3_round(&t0, 4)?, &[ @@ -39,7 +39,7 @@ fn softmax(device: &Device) -> Result<()> { [[0.2, 0.1, 0.7], [0.4444, 0.1111, 0.4444]] ] ); - let t2 = diffuse_rs_common::nn::ops::softmax_last_dim(&tensor.log()?)?; + let t2 = diffusion_rs_common::nn::ops::softmax_last_dim(&tensor.log()?)?; assert_eq!( to_vec3_round(&t2, 4)?, &[ @@ -55,7 +55,7 @@ fn softmax(device: &Device) -> Result<()> { fn inplace_softmax(device: &Device) -> Result<()> { let data = &[[[3f32, 1., 4.], [1., 5., 9.]], [[2., 1., 7.], [8., 2., 8.]]]; let mut tensor = Tensor::new(data, device)?.log()?; - diffuse_rs_common::nn::ops::inplace_softmax_last_dim(&mut tensor)?; + diffusion_rs_common::nn::ops::inplace_softmax_last_dim(&mut tensor)?; assert_eq!( to_vec3_round(&tensor, 4)?, &[ @@ -72,7 +72,7 @@ fn rms_norm(device: &Device) -> Result<()> { let data = &[[[3f32, 1., 4.], [1., 5., 9.]], [[2., 1., 7.], [8., 2., 8.]]]; let tensor = Tensor::new(data, device)?; let alpha = Tensor::new(&[1f32, 2f32, 3f32], device)?; - let t = diffuse_rs_common::nn::ops::rms_norm(&tensor, &alpha, 1e-5)?; + let t = diffusion_rs_common::nn::ops::rms_norm(&tensor, &alpha, 1e-5)?; assert_eq!( to_vec3_round(&t, 4)?, &[ @@ -80,7 +80,7 @@ fn rms_norm(device: &Device) -> Result<()> { [[0.4714, 0.4714, 4.9497], [1.206, 0.603, 3.6181]] ] ); - let t2 = diffuse_rs_common::nn::ops::rms_norm_slow(&tensor, &alpha, 1e-5)?; + let t2 = diffusion_rs_common::nn::ops::rms_norm_slow(&tensor, &alpha, 1e-5)?; assert_eq!( to_vec3_round(&t2, 4)?, &[ @@ -102,8 +102,8 @@ fn rms_norml(device: &Device) -> Result<()> { let src: Vec = (0..el_count).map(|_| rng.gen::()).collect(); let tensor = Tensor::new(src, device)?.reshape((b_size, seq_len, head_dim))?; let alpha = Tensor::ones(head_dim, crate::core::DType::F32, device)?; - let t = diffuse_rs_common::nn::ops::rms_norm(&tensor, &alpha, 1e-5)?; - let t2 = diffuse_rs_common::nn::ops::rms_norm_slow(&tensor, &alpha, 1e-5)?; + let t = diffusion_rs_common::nn::ops::rms_norm(&tensor, &alpha, 1e-5)?; + let t2 = diffusion_rs_common::nn::ops::rms_norm_slow(&tensor, &alpha, 1e-5)?; let diff = (t - t2)? .abs()? .flatten_all()? @@ -119,7 +119,7 @@ fn layer_norm(device: &Device) -> Result<()> { let tensor = Tensor::new(data, device)?; let alpha = Tensor::new(&[1f32, 2f32, 3f32], device)?; let beta = Tensor::new(&[0.5f32, 0f32, -0.2f32], device)?; - let t = diffuse_rs_common::nn::ops::layer_norm(&tensor, &alpha, &beta, 1e-5)?; + let t = diffusion_rs_common::nn::ops::layer_norm(&tensor, &alpha, &beta, 1e-5)?; assert_eq!( to_vec3_round(&t, 4)?, &[ @@ -127,7 +127,7 @@ fn layer_norm(device: &Device) -> Result<()> { [[-0.008, -1.778, 3.991], [1.2071, -2.8284, 1.9213]] ] ); - let t2 = diffuse_rs_common::nn::ops::layer_norm_slow(&tensor, &alpha, &beta, 1e-5)?; + let t2 = diffusion_rs_common::nn::ops::layer_norm_slow(&tensor, &alpha, &beta, 1e-5)?; assert_eq!( to_vec3_round(&t2, 4)?, &[ @@ -150,8 +150,8 @@ fn layer_norml(device: &Device) -> Result<()> { let tensor = Tensor::new(src, device)?.reshape((b_size, seq_len, head_dim))?; let alpha = Tensor::ones(head_dim, crate::core::DType::F32, device)?; let beta = Tensor::zeros(head_dim, crate::core::DType::F32, device)?; - let t = diffuse_rs_common::nn::ops::layer_norm(&tensor, &alpha, &beta, 1e-5)?; - let t2 = diffuse_rs_common::nn::ops::layer_norm_slow(&tensor, &alpha, &beta, 1e-5)?; + let t = diffusion_rs_common::nn::ops::layer_norm(&tensor, &alpha, &beta, 1e-5)?; + let t2 = diffusion_rs_common::nn::ops::layer_norm_slow(&tensor, &alpha, &beta, 1e-5)?; let diff = (t - t2)? .abs()? .flatten_all()? @@ -166,7 +166,7 @@ fn layer_norml(device: &Device) -> Result<()> { fn softmax_numerical_stability() -> Result<()> { let dev = &Device::Cpu; let xs = Tensor::new(&[1234f32, 0.], dev)?; - let softmax = diffuse_rs_common::nn::ops::softmax(&xs, 0)?; + let softmax = diffusion_rs_common::nn::ops::softmax(&xs, 0)?; assert_eq!(softmax.to_vec1::()?, &[1f32, 0.]); Ok(()) } @@ -187,8 +187,8 @@ fn ropei(device: &Device) -> Result<()> { let src = Tensor::from_vec(src, (b_size, num_head, seq_len, head_dim), device)?; let cos = Tensor::from_vec(cos, (seq_len, head_dim / 2), device)?; let sin = Tensor::from_vec(sin, (seq_len, head_dim / 2), device)?; - let rope1 = diffuse_rs_common::nn::rotary_emb::rope_i(&src, &cos, &sin)?; - let rope2 = diffuse_rs_common::nn::rotary_emb::rope_i_slow(&src, &cos, &sin)?; + let rope1 = diffusion_rs_common::nn::rotary_emb::rope_i(&src, &cos, &sin)?; + let rope2 = diffusion_rs_common::nn::rotary_emb::rope_i_slow(&src, &cos, &sin)?; let sum_diff = (rope1 - rope2)?.abs()?.sum_all()?.to_vec0::()?; if device.is_cpu() { assert_eq!(sum_diff, 0.); @@ -214,8 +214,8 @@ fn rope(device: &Device) -> Result<()> { let src = Tensor::from_vec(src, (b_size, num_head, seq_len, head_dim), device)?; let cos = Tensor::from_vec(cos, (seq_len, head_dim / 2), device)?; let sin = Tensor::from_vec(sin, (seq_len, head_dim / 2), device)?; - let rope1 = diffuse_rs_common::nn::rotary_emb::rope(&src, &cos, &sin)?; - let rope2 = diffuse_rs_common::nn::rotary_emb::rope_slow(&src, &cos, &sin)?; + let rope1 = diffusion_rs_common::nn::rotary_emb::rope(&src, &cos, &sin)?; + let rope2 = diffusion_rs_common::nn::rotary_emb::rope_slow(&src, &cos, &sin)?; let sum_diff = (rope1 - rope2)?.abs()?.sum_all()?.to_vec0::()?; if device.is_cpu() { assert_eq!(sum_diff, 0.); @@ -243,9 +243,9 @@ fn rope_thd(device: &Device) -> Result<()> { let sin = Tensor::from_vec(sin, (seq_len, head_dim / 2), device)?; let rope1 = { let src = src.transpose(1, 2)?.contiguous()?; - diffuse_rs_common::nn::rotary_emb::rope_thd(&src, &cos, &sin)?.transpose(1, 2)? + diffusion_rs_common::nn::rotary_emb::rope_thd(&src, &cos, &sin)?.transpose(1, 2)? }; - let rope2 = diffuse_rs_common::nn::rotary_emb::rope_slow(&src, &cos, &sin)?; + let rope2 = diffusion_rs_common::nn::rotary_emb::rope_slow(&src, &cos, &sin)?; let sum_diff = (rope1 - rope2)?.abs()?.sum_all()?.to_vec0::()?; if device.is_cpu() { assert_eq!(sum_diff, 0.); @@ -258,7 +258,7 @@ fn rope_thd(device: &Device) -> Result<()> { fn sigmoid(device: &Device) -> Result<()> { let data = &[[[3f32, 1., 4.], [1., 5., 9.]], [[2., 1., 7.], [8., 2., 8.]]]; let tensor = Tensor::new(data, device)?; - let s1 = diffuse_rs_common::nn::ops::sigmoid(&tensor)?; + let s1 = diffusion_rs_common::nn::ops::sigmoid(&tensor)?; let s2 = (1. / (1. + tensor.neg()?.exp()?)?)?; let diff = (s1 - s2)?.abs()?.sum_all()?.to_vec0::()?; assert_eq!(diff, 0.); diff --git a/diffuse_rs_common/src/nn/tests/optim.rs b/diffusion_rs_common/src/nn/tests/optim.rs similarity index 96% rename from diffuse_rs_common/src/nn/tests/optim.rs rename to diffusion_rs_common/src/nn/tests/optim.rs index f7cff38..8958b36 100644 --- a/diffuse_rs_common/src/nn/tests/optim.rs +++ b/diffusion_rs_common/src/nn/tests/optim.rs @@ -8,7 +8,7 @@ use crate::core::test_utils::{to_vec0_round, to_vec2_round}; use anyhow::Result; use crate::core::{DType, Device, Tensor, Var}; -use diffuse_rs_common::nn::{AdamW, Linear, Module, Optimizer, ParamsAdamW, SGD}; +use diffusion_rs_common::nn::{AdamW, Linear, Module, Optimizer, ParamsAdamW, SGD}; #[test] fn sgd_optim() -> Result<()> { @@ -124,7 +124,7 @@ fn adamw_linear_regression() -> Result<()> { #[test] fn adamw_linear_regression_varmap() -> Result<()> { - use diffuse_rs_common::nn::Init::Const; + use diffusion_rs_common::nn::Init::Const; // Similar as the previous test but using a VarMap. let w_gen = Tensor::new(&[[3f32, 1.]], &Device::Cpu)?; @@ -133,7 +133,7 @@ fn adamw_linear_regression_varmap() -> Result<()> { let sample_xs = Tensor::new(&[[2f32, 1.], [7., 4.], [-4., 12.], [5., 8.]], &Device::Cpu)?; let sample_ys = gen.forward(&sample_xs)?; - let mut var_map = diffuse_rs_common::nn::VarMap::new(); + let mut var_map = diffusion_rs_common::nn::VarMap::new(); let w = var_map.get((1, 2), "w", Const(0.), DType::F32, &Device::Cpu)?; let b = var_map.get((), "b", Const(0.), DType::F32, &Device::Cpu)?; diff --git a/diffuse_rs_common/src/nn/tests/rnn.rs b/diffusion_rs_common/src/nn/tests/rnn.rs similarity index 91% rename from diffuse_rs_common/src/nn/tests/rnn.rs rename to diffusion_rs_common/src/nn/tests/rnn.rs index d50fbcf..10c3d5c 100644 --- a/diffuse_rs_common/src/nn/tests/rnn.rs +++ b/diffusion_rs_common/src/nn/tests/rnn.rs @@ -5,7 +5,7 @@ extern crate intel_mkl_src; extern crate accelerate_src; use crate::core::{test_utils::to_vec2_round, DType, Device, Result, Tensor}; -use diffuse_rs_common::nn::RNN; +use diffusion_rs_common::nn::RNN; /* The following test can be verified against PyTorch using the following snippet. import torch @@ -42,8 +42,8 @@ fn lstm() -> Result<()> { ] .into_iter() .collect(); - let vb = diffuse_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, cpu); - let lstm = diffuse_rs_common::nn::lstm(2, 3, Default::default(), vb)?; + let vb = diffusion_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, cpu); + let lstm = diffusion_rs_common::nn::lstm(2, 3, Default::default(), vb)?; let mut state = lstm.zero_state(1)?; for inp in [3f32, 1., 4., 1., 5., 9., 2.] { let inp = Tensor::new(&[[inp, inp * 0.5]], cpu)?; @@ -88,8 +88,8 @@ fn gru() -> Result<()> { ] .into_iter() .collect(); - let vb = diffuse_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, cpu); - let gru = diffuse_rs_common::nn::gru(2, 3, Default::default(), vb)?; + let vb = diffusion_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, cpu); + let gru = diffusion_rs_common::nn::gru(2, 3, Default::default(), vb)?; let mut state = gru.zero_state(1)?; for inp in [3f32, 1., 4., 1., 5., 9., 2.] { let inp = Tensor::new(&[[inp, inp * 0.5]], cpu)?; diff --git a/diffuse_rs_common/src/nn/tests/sdpa.rs b/diffusion_rs_common/src/nn/tests/sdpa.rs similarity index 83% rename from diffuse_rs_common/src/nn/tests/sdpa.rs rename to diffusion_rs_common/src/nn/tests/sdpa.rs index a89a562..53c0787 100644 --- a/diffuse_rs_common/src/nn/tests/sdpa.rs +++ b/diffusion_rs_common/src/nn/tests/sdpa.rs @@ -20,12 +20,12 @@ mod metal_sdpa_tests { let ground_truth = { let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?; - let att = diffuse_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)? + let att = diffusion_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)? .to_dtype(q.dtype())?; att.matmul(&v.clone())? }; - let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?; + let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?; assert_eq!(ground_truth.shape(), sdpa_output.shape()); @@ -58,12 +58,12 @@ mod metal_sdpa_tests { let ground_truth = { let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?; - let att = diffuse_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)? + let att = diffusion_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)? .to_dtype(q.dtype())?; att.matmul(&v.clone())? }; - let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?; + let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?; assert_eq!(ground_truth.shape(), sdpa_output.shape()); @@ -96,12 +96,12 @@ mod metal_sdpa_tests { let ground_truth = { let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?; - let att = diffuse_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)? + let att = diffusion_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)? .to_dtype(q.dtype())?; att.matmul(&v.clone())? }; - let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?; + let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?; assert_eq!(ground_truth.shape(), sdpa_output.shape()); @@ -136,7 +136,7 @@ mod metal_sdpa_tests { let ground_truth = { let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?; - let att = diffuse_rs_common::nn::ops::softmax_last_dim( + let att = diffusion_rs_common::nn::ops::softmax_last_dim( &att.to_dtype(DType::F32)? .div(SOFTCAP)? .tanh()? @@ -146,7 +146,7 @@ mod metal_sdpa_tests { att.matmul(&v.clone())? }; - let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?; + let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?; assert_eq!(ground_truth.shape(), sdpa_output.shape()); @@ -181,7 +181,7 @@ mod metal_sdpa_tests { let ground_truth = { let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?; - let att = diffuse_rs_common::nn::ops::softmax_last_dim( + let att = diffusion_rs_common::nn::ops::softmax_last_dim( &att.to_dtype(DType::F32)? .div(SOFTCAP)? .tanh()? @@ -191,7 +191,7 @@ mod metal_sdpa_tests { att.matmul(&v.clone())? }; - let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?; + let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?; assert_eq!(ground_truth.shape(), sdpa_output.shape()); @@ -226,7 +226,7 @@ mod metal_sdpa_tests { let ground_truth = { let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?; - let att = diffuse_rs_common::nn::ops::softmax_last_dim( + let att = diffusion_rs_common::nn::ops::softmax_last_dim( &att.to_dtype(DType::F32)? .div(SOFTCAP)? .tanh()? @@ -236,7 +236,7 @@ mod metal_sdpa_tests { att.matmul(&v.clone())? }; - let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?; + let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, SOFTCAP as f32)?; assert_eq!(ground_truth.shape(), sdpa_output.shape()); @@ -269,12 +269,12 @@ mod metal_sdpa_tests { let ground_truth = { let att = (q.clone() * scale)?.matmul(&k.clone().t()?)?; - let att = diffuse_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)? + let att = diffusion_rs_common::nn::ops::softmax_last_dim(&att.to_dtype(DType::F32)?)? .to_dtype(q.dtype())?; att.matmul(&v.clone())? }; - let sdpa_output = diffuse_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?; + let sdpa_output = diffusion_rs_common::nn::ops::sdpa(&q, &k, &v, scale as f32, 1.)?; assert_eq!(ground_truth.shape(), sdpa_output.shape()); @@ -296,9 +296,9 @@ mod metal_sdpa_tests { let tensor = Tensor::randn(0f32, 1f32, (4, 32, 64, 64), &device)?; let truemask = Tensor::full(f32::MIN, (64, 64), &device)?.contiguous()?; - let ground_truth = diffuse_rs_common::nn::ops::softmax_last_dim(&tensor.broadcast_add(&truemask)?)?; + let ground_truth = diffusion_rs_common::nn::ops::softmax_last_dim(&tensor.broadcast_add(&truemask)?)?; - let softmax_out = diffuse_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, 1.)?; + let softmax_out = diffusion_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, 1.)?; let error: f32 = ((&ground_truth - &softmax_out)?.abs()? / &ground_truth.abs()?)? .sum_all()? @@ -323,10 +323,10 @@ mod metal_sdpa_tests { let scale = 0.1f32; let ground_truth = - diffuse_rs_common::nn::ops::softmax_last_dim(&(tensor.broadcast_add(&truemask)? * scale as f64)?)? + diffusion_rs_common::nn::ops::softmax_last_dim(&(tensor.broadcast_add(&truemask)? * scale as f64)?)? .to_dtype(DType::F32)?; - let softmax_out = diffuse_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, scale)? + let softmax_out = diffusion_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, scale)? .to_dtype(DType::F32)?; let error: f32 = ((&ground_truth - &softmax_out)?.abs()? / &ground_truth.abs()?)? @@ -348,9 +348,9 @@ mod metal_sdpa_tests { let tensor = Tensor::randn(0f32, 1f32, (4, 32, 64, 63), &device)?; let truemask = Tensor::full(f32::MIN, (64, 63), &device)?.contiguous()?; - let ground_truth = diffuse_rs_common::nn::ops::softmax_last_dim(&tensor.broadcast_add(&truemask)?)?; + let ground_truth = diffusion_rs_common::nn::ops::softmax_last_dim(&tensor.broadcast_add(&truemask)?)?; - let softmax_out = diffuse_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, 1.)?; + let softmax_out = diffusion_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, 1.)?; let error: f32 = ((&ground_truth - &softmax_out)?.abs()? / &ground_truth.abs()?)? .sum_all()? @@ -375,10 +375,10 @@ mod metal_sdpa_tests { let scale = 0.1f32; let ground_truth = - diffuse_rs_common::nn::ops::softmax_last_dim(&(tensor.broadcast_add(&truemask)? * scale as f64)?)? + diffusion_rs_common::nn::ops::softmax_last_dim(&(tensor.broadcast_add(&truemask)? * scale as f64)?)? .to_dtype(DType::F32)?; - let softmax_out = diffuse_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, scale)? + let softmax_out = diffusion_rs_common::nn::ops::attn_softmax_last_dim(&tensor, &truemask, scale)? .to_dtype(DType::F32)?; let error: f32 = ((&ground_truth - &softmax_out)?.abs()? / &ground_truth.abs()?)? diff --git a/diffuse_rs_common/src/nn/var_builder.rs b/diffusion_rs_common/src/nn/var_builder.rs similarity index 99% rename from diffuse_rs_common/src/nn/var_builder.rs rename to diffusion_rs_common/src/nn/var_builder.rs index 16dae4c..15d7e92 100644 --- a/diffuse_rs_common/src/nn/var_builder.rs +++ b/diffusion_rs_common/src/nn/var_builder.rs @@ -650,7 +650,7 @@ impl<'a> VarBuilder<'a> { /// passing the new names to the inner VarBuilder. /// /// ```rust - /// use diffuse_rs_common::core::{Tensor, DType, Device}; + /// use diffusion_rs_common::core::{Tensor, DType, Device}; /// /// let a = Tensor::arange(0f32, 6f32, &Device::Cpu)?.reshape((2, 3))?; /// let tensors: std::collections::HashMap<_, _> = [ @@ -658,7 +658,7 @@ impl<'a> VarBuilder<'a> { /// ] /// .into_iter() /// .collect(); - /// let vb = diffuse_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, &Device::Cpu); + /// let vb = diffusion_rs_common::nn::VarBuilder::from_tensors(tensors, DType::F32, &Device::Cpu); /// assert!(vb.contains_tensor("foo")); /// assert!(vb.get((2, 3), "foo").is_ok()); /// assert!(!vb.contains_tensor("bar")); @@ -668,7 +668,7 @@ impl<'a> VarBuilder<'a> { /// assert!(vb.get((2, 3), "bar").is_ok()); /// assert!(vb.get((2, 3), "foo").is_ok()); /// assert!(!vb.contains_tensor("baz")); - /// # Ok::<(), diffuse_rs_common::core::Error>(()) + /// # Ok::<(), diffusion_rs_common::core::Error>(()) /// ``` pub fn rename_f String + Sync + Send + 'static>(self, f: F) -> Self { let f: Box String + Sync + Send + 'static> = Box::new(f); diff --git a/diffuse_rs_common/src/nn/var_map.rs b/diffusion_rs_common/src/nn/var_map.rs similarity index 100% rename from diffuse_rs_common/src/nn/var_map.rs rename to diffusion_rs_common/src/nn/var_map.rs diff --git a/diffuse_rs_common/src/nn_wrap.rs b/diffusion_rs_common/src/nn_wrap.rs similarity index 100% rename from diffuse_rs_common/src/nn_wrap.rs rename to diffusion_rs_common/src/nn_wrap.rs diff --git a/diffuse_rs_common/src/progress.rs b/diffusion_rs_common/src/progress.rs similarity index 100% rename from diffuse_rs_common/src/progress.rs rename to diffusion_rs_common/src/progress.rs diff --git a/diffuse_rs_common/src/safetensors.rs b/diffusion_rs_common/src/safetensors.rs similarity index 100% rename from diffuse_rs_common/src/safetensors.rs rename to diffusion_rs_common/src/safetensors.rs diff --git a/diffuse_rs_common/src/tokenizer.rs b/diffusion_rs_common/src/tokenizer.rs similarity index 100% rename from diffuse_rs_common/src/tokenizer.rs rename to diffusion_rs_common/src/tokenizer.rs diff --git a/diffuse_rs_common/src/tokens.rs b/diffusion_rs_common/src/tokens.rs similarity index 100% rename from diffuse_rs_common/src/tokens.rs rename to diffusion_rs_common/src/tokens.rs diff --git a/diffuse_rs_common/src/varbuilder.rs b/diffusion_rs_common/src/varbuilder.rs similarity index 100% rename from diffuse_rs_common/src/varbuilder.rs rename to diffusion_rs_common/src/varbuilder.rs diff --git a/diffuse_rs_common/src/varbuilder_loading.rs b/diffusion_rs_common/src/varbuilder_loading.rs similarity index 100% rename from diffuse_rs_common/src/varbuilder_loading.rs rename to diffusion_rs_common/src/varbuilder_loading.rs diff --git a/diffuse_rs_core/Cargo.toml b/diffusion_rs_core/Cargo.toml similarity index 59% rename from diffuse_rs_core/Cargo.toml rename to diffusion_rs_core/Cargo.toml index 405bb50..330c701 100644 --- a/diffuse_rs_core/Cargo.toml +++ b/diffusion_rs_core/Cargo.toml @@ -1,10 +1,10 @@ [package] -name = "diffuse_rs_core" +name = "diffusion_rs_core" readme.workspace = true authors.workspace = true version.workspace = true edition.workspace = true -description = "Core package of diffuse_rs" +description = "Core package of diffusion_rs" repository.workspace = true keywords.workspace = true categories.workspace = true @@ -16,8 +16,8 @@ anyhow.workspace = true float8.workspace = true half.workspace = true hf-hub.workspace = true -diffuse_rs_backend = { path = "../diffuse_rs_backend" } -diffuse_rs_common = { path = "../diffuse_rs_common" } +diffusion_rs_backend = { path = "../diffusion_rs_backend" } +diffusion_rs_common = { path = "../diffusion_rs_common" } serde.workspace = true serde_plain.workspace = true serde_json.workspace = true @@ -31,8 +31,8 @@ objc = { workspace = true, optional = true } clap.workspace = true [features] -cuda = ["diffuse_rs_common/cuda", "diffuse_rs_backend/cuda"] -cudnn = ["diffuse_rs_common/cudnn"] -metal = ["diffuse_rs_common/metal", "diffuse_rs_backend/metal", "dep:objc"] -accelerate = ["diffuse_rs_common/accelerate"] -mkl = ["diffuse_rs_common/mkl"] +cuda = ["diffusion_rs_common/cuda", "diffusion_rs_backend/cuda"] +cudnn = ["diffusion_rs_common/cudnn"] +metal = ["diffusion_rs_common/metal", "diffusion_rs_backend/metal", "dep:objc"] +accelerate = ["diffusion_rs_common/accelerate"] +mkl = ["diffusion_rs_common/mkl"] diff --git a/diffuse_rs_core/src/lib.rs b/diffusion_rs_core/src/lib.rs similarity index 81% rename from diffuse_rs_core/src/lib.rs rename to diffusion_rs_core/src/lib.rs index 0acf129..0575831 100644 --- a/diffuse_rs_core/src/lib.rs +++ b/diffusion_rs_core/src/lib.rs @@ -1,11 +1,11 @@ -//! Core crate for interacting with diffuse_rs. +//! Core crate for interacting with diffusion_rs. //! //! The API is intentionally straightforward but strives to provide strong flexibility. //! //! ```rust,no_run //! use std::time::Instant; //! -//! use diffuse_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource}; +//! use diffusion_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource}; //! //! let pipeline = Pipeline::load( //! ModelSource::dduf("FLUX.1-dev-Q4-bnb.dduf")?, @@ -38,5 +38,5 @@ mod models; mod pipelines; -pub use diffuse_rs_common::{ModelSource, TokenSource}; +pub use diffusion_rs_common::{ModelSource, TokenSource}; pub use pipelines::{DiffusionGenerationParams, Offloading, Pipeline}; diff --git a/diffuse_rs_core/src/models/clip/mod.rs b/diffusion_rs_core/src/models/clip/mod.rs similarity index 100% rename from diffuse_rs_core/src/models/clip/mod.rs rename to diffusion_rs_core/src/models/clip/mod.rs diff --git a/diffuse_rs_core/src/models/clip/text.rs b/diffusion_rs_core/src/models/clip/text.rs similarity index 78% rename from diffuse_rs_core/src/models/clip/text.rs rename to diffusion_rs_core/src/models/clip/text.rs index 4fd3448..298e83c 100644 --- a/diffuse_rs_core/src/models/clip/text.rs +++ b/diffusion_rs_core/src/models/clip/text.rs @@ -1,7 +1,7 @@ #![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] -use diffuse_rs_common::core::{DType, Device, IndexOp, Result, Tensor, D}; -use diffuse_rs_common::nn::{ops::sigmoid, Module}; +use diffusion_rs_common::core::{DType, Device, IndexOp, Result, Tensor, D}; +use diffusion_rs_common::nn::{ops::sigmoid, Module}; use serde::Deserialize; #[derive(Debug, Clone, Copy, Deserialize)] @@ -33,16 +33,19 @@ pub struct ClipTextConfig { // TODO rewrite to be more similar to https://github.com/huggingface/transformers/blob/f6fa0f0bf0796ac66f201f23bdb8585de1609add/src/transformers/models/clip/modeling_clip.py#L142 #[derive(Clone, Debug)] struct ClipTextEmbeddings { - token_embedding: diffuse_rs_common::nn::Embedding, - position_embedding: diffuse_rs_common::nn::Embedding, + token_embedding: diffusion_rs_common::nn::Embedding, + position_embedding: diffusion_rs_common::nn::Embedding, position_ids: Tensor, } impl ClipTextEmbeddings { - fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { - let token_embedding = - diffuse_rs_common::embedding(c.vocab_size, c.projection_dim, vs.pp("token_embedding"))?; - let position_embedding = diffuse_rs_common::embedding( + fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { + let token_embedding = diffusion_rs_common::embedding( + c.vocab_size, + c.projection_dim, + vs.pp("token_embedding"), + )?; + let position_embedding = diffusion_rs_common::embedding( c.max_position_embeddings, c.projection_dim, vs.pp("position_embedding"), @@ -69,24 +72,24 @@ impl Module for ClipTextEmbeddings { #[derive(Clone, Debug)] struct ClipAttention { - k_proj: diffuse_rs_common::nn::Linear, - v_proj: diffuse_rs_common::nn::Linear, - q_proj: diffuse_rs_common::nn::Linear, - out_proj: diffuse_rs_common::nn::Linear, + k_proj: diffusion_rs_common::nn::Linear, + v_proj: diffusion_rs_common::nn::Linear, + q_proj: diffusion_rs_common::nn::Linear, + out_proj: diffusion_rs_common::nn::Linear, head_dim: usize, scale: f64, num_attention_heads: usize, } impl ClipAttention { - fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { + fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { let projection_dim = c.projection_dim; let num_attention_heads = c.num_attention_heads; - let k_proj = diffuse_rs_common::linear(projection_dim, projection_dim, vs.pp("k_proj"))?; - let v_proj = diffuse_rs_common::linear(projection_dim, projection_dim, vs.pp("v_proj"))?; - let q_proj = diffuse_rs_common::linear(projection_dim, projection_dim, vs.pp("q_proj"))?; + let k_proj = diffusion_rs_common::linear(projection_dim, projection_dim, vs.pp("k_proj"))?; + let v_proj = diffusion_rs_common::linear(projection_dim, projection_dim, vs.pp("v_proj"))?; + let q_proj = diffusion_rs_common::linear(projection_dim, projection_dim, vs.pp("q_proj"))?; let out_proj = - diffuse_rs_common::linear(projection_dim, projection_dim, vs.pp("out_proj"))?; + diffusion_rs_common::linear(projection_dim, projection_dim, vs.pp("out_proj"))?; let head_dim = projection_dim / num_attention_heads; let scale = (head_dim as f64).powf(-0.5); @@ -138,7 +141,7 @@ impl ClipAttention { attn_weights }; - let attn_weights = diffuse_rs_common::nn::ops::softmax(&attn_weights, D::Minus1)?; + let attn_weights = diffusion_rs_common::nn::ops::softmax(&attn_weights, D::Minus1)?; let attn_output = attn_weights.matmul(&value_states)?.to_dtype(in_dtype)?; let attn_output = attn_output @@ -151,15 +154,15 @@ impl ClipAttention { #[derive(Clone, Debug)] struct ClipMlp { - fc1: diffuse_rs_common::nn::Linear, - fc2: diffuse_rs_common::nn::Linear, + fc1: diffusion_rs_common::nn::Linear, + fc2: diffusion_rs_common::nn::Linear, activation: Activation, } impl ClipMlp { - fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { - let fc1 = diffuse_rs_common::linear(c.projection_dim, c.intermediate_size, vs.pp("fc1"))?; - let fc2 = diffuse_rs_common::linear(c.intermediate_size, c.projection_dim, vs.pp("fc2"))?; + fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { + let fc1 = diffusion_rs_common::linear(c.projection_dim, c.intermediate_size, vs.pp("fc1"))?; + let fc2 = diffusion_rs_common::linear(c.intermediate_size, c.projection_dim, vs.pp("fc2"))?; Ok(ClipMlp { fc1, @@ -179,19 +182,19 @@ impl ClipMlp { #[derive(Clone, Debug)] struct ClipEncoderLayer { self_attn: ClipAttention, - layer_norm1: diffuse_rs_common::nn::LayerNorm, + layer_norm1: diffusion_rs_common::nn::LayerNorm, mlp: ClipMlp, - layer_norm2: diffuse_rs_common::nn::LayerNorm, + layer_norm2: diffusion_rs_common::nn::LayerNorm, } impl ClipEncoderLayer { - fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { + fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { let self_attn = ClipAttention::new(vs.pp("self_attn"), c)?; let layer_norm1 = - diffuse_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("layer_norm1"))?; + diffusion_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("layer_norm1"))?; let mlp = ClipMlp::new(vs.pp("mlp"), c)?; let layer_norm2 = - diffuse_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("layer_norm2"))?; + diffusion_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("layer_norm2"))?; Ok(ClipEncoderLayer { self_attn, @@ -220,7 +223,7 @@ struct ClipEncoder { } impl ClipEncoder { - pub fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { + pub fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { let vs = vs.pp("layers"); let mut layers: Vec = Vec::new(); for index in 0..c.num_hidden_layers { @@ -244,16 +247,16 @@ impl ClipEncoder { pub struct ClipTextTransformer { embeddings: ClipTextEmbeddings, encoder: ClipEncoder, - final_layer_norm: diffuse_rs_common::nn::LayerNorm, + final_layer_norm: diffusion_rs_common::nn::LayerNorm, device: Device, } impl ClipTextTransformer { - pub fn new(vs: diffuse_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { + pub fn new(vs: diffusion_rs_common::VarBuilder, c: &ClipTextConfig) -> Result { let embeddings = ClipTextEmbeddings::new(vs.pp("embeddings"), c)?; let encoder = ClipEncoder::new(vs.pp("encoder"), c)?; let final_layer_norm = - diffuse_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("final_layer_norm"))?; + diffusion_rs_common::layer_norm(c.projection_dim, 1e-5, vs.pp("final_layer_norm"))?; Ok(ClipTextTransformer { embeddings, encoder, diff --git a/diffuse_rs_core/src/models/flux/mod.rs b/diffusion_rs_core/src/models/flux/mod.rs similarity index 100% rename from diffuse_rs_core/src/models/flux/mod.rs rename to diffusion_rs_core/src/models/flux/mod.rs diff --git a/diffuse_rs_core/src/models/flux/model.rs b/diffusion_rs_core/src/models/flux/model.rs similarity index 90% rename from diffuse_rs_core/src/models/flux/model.rs rename to diffusion_rs_core/src/models/flux/model.rs index 176585a..0544237 100644 --- a/diffuse_rs_core/src/models/flux/model.rs +++ b/diffusion_rs_core/src/models/flux/model.rs @@ -2,13 +2,13 @@ use std::sync::Arc; -use diffuse_rs_backend::{QuantMethod, QuantizedConfig}; -use diffuse_rs_common::core::{DType, Device, IndexOp, Result, Tensor, D}; -use diffuse_rs_common::nn::{layer_norm::RmsNormNonQuantized, LayerNorm, RmsNorm}; -use diffuse_rs_common::VarBuilder; +use diffusion_rs_backend::{QuantMethod, QuantizedConfig}; +use diffusion_rs_common::core::{DType, Device, IndexOp, Result, Tensor, D}; +use diffusion_rs_common::nn::{layer_norm::RmsNormNonQuantized, LayerNorm, RmsNorm}; +use diffusion_rs_common::VarBuilder; use serde::Deserialize; -use diffuse_rs_common::NiceProgressBar; +use diffusion_rs_common::NiceProgressBar; use tracing::{span, Span}; use crate::models::{QuantizedModel, QuantizedModelLayer}; @@ -40,7 +40,7 @@ fn layer_norm(dim: usize, vb: VarBuilder) -> Result { fn scaled_dot_product_attention(q: &Tensor, k: &Tensor, v: &Tensor) -> Result { let dim = q.dim(D::Minus1)?; let scale_factor = 1.0 / (dim as f64).sqrt(); - diffuse_rs_backend::ops::sdpa( + diffusion_rs_backend::ops::sdpa( &q.to_dtype(DType::F32)?, &k.to_dtype(DType::F32)?, &v.to_dtype(DType::F32)?, @@ -56,7 +56,7 @@ fn scaled_dot_product_attention(q: &Tensor, k: &Tensor, v: &Tensor) -> Result Result Result { if dim % 2 == 1 { - diffuse_rs_common::bail!("dim {dim} is odd") + diffusion_rs_common::bail!("dim {dim} is odd") } let dev = pos.device(); let theta = theta as f64; @@ -105,17 +105,17 @@ fn timestep_embedding(t: &Tensor, dim: usize, dtype: DType) -> Result { const TIME_FACTOR: f64 = 1000.; const MAX_PERIOD: f64 = 10000.; if dim % 2 == 1 { - diffuse_rs_common::bail!("{dim} is odd") + diffusion_rs_common::bail!("{dim} is odd") } let dev = t.device(); let half = dim / 2; let t = (t * TIME_FACTOR)?; let arange = - Tensor::arange(0, half as u32, dev)?.to_dtype(diffuse_rs_common::core::DType::F32)?; + Tensor::arange(0, half as u32, dev)?.to_dtype(diffusion_rs_common::core::DType::F32)?; let freqs = (arange * (-MAX_PERIOD.ln() / half as f64))?.exp()?; let args = t .unsqueeze(1)? - .to_dtype(diffuse_rs_common::core::DType::F32)? + .to_dtype(diffusion_rs_common::core::DType::F32)? .broadcast_mul(&freqs.unsqueeze(0)?)?; let emb = Tensor::cat(&[args.cos()?, args.sin()?], D::Minus1)?.to_dtype(dtype)?; Ok(emb) @@ -139,7 +139,7 @@ impl EmbedNd { } } -impl diffuse_rs_common::core::Module for EmbedNd { +impl diffusion_rs_common::core::Module for EmbedNd { fn forward(&self, ids: &Tensor) -> Result { let n_axes = ids.dim(D::Minus1)?; let mut emb = Vec::with_capacity(n_axes); @@ -165,9 +165,9 @@ pub struct MlpEmbedder { impl MlpEmbedder { fn new(in_sz: usize, h_sz: usize, cfg: &Config, vb: VarBuilder) -> Result { let in_layer = - diffuse_rs_backend::linear(in_sz, h_sz, &cfg.quantization_config, vb.pp("linear_1"))?; + diffusion_rs_backend::linear(in_sz, h_sz, &cfg.quantization_config, vb.pp("linear_1"))?; let out_layer = - diffuse_rs_backend::linear(h_sz, h_sz, &cfg.quantization_config, vb.pp("linear_2"))?; + diffusion_rs_backend::linear(h_sz, h_sz, &cfg.quantization_config, vb.pp("linear_2"))?; Ok(Self { in_layer, out_layer, @@ -175,7 +175,7 @@ impl MlpEmbedder { } } -impl diffuse_rs_common::core::Module for MlpEmbedder { +impl diffusion_rs_common::core::Module for MlpEmbedder { fn forward(&self, xs: &Tensor) -> Result { self.out_layer .forward_autocast(&self.in_layer.forward_autocast(xs)?.silu()?) @@ -234,7 +234,7 @@ struct Modulation1 { impl Modulation1 { fn new(dim: usize, cfg: &Config, vb: VarBuilder) -> Result { let lin = - diffuse_rs_backend::linear(dim, 3 * dim, &cfg.quantization_config, vb.pp("linear"))?; + diffusion_rs_backend::linear(dim, 3 * dim, &cfg.quantization_config, vb.pp("linear"))?; Ok(Self { lin, mod1: span!(tracing::Level::TRACE, "flux-mod1"), @@ -249,7 +249,7 @@ impl Modulation1 { .unsqueeze(1)? .chunk(3, D::Minus1)?; if ys.len() != 3 { - diffuse_rs_common::bail!("unexpected len from chunk {ys:?}") + diffusion_rs_common::bail!("unexpected len from chunk {ys:?}") } Ok(ModulationOut { shift: ys[0].clone(), @@ -268,7 +268,7 @@ struct Modulation2 { impl Modulation2 { fn new(dim: usize, cfg: &Config, vb: VarBuilder) -> Result { let lin = - diffuse_rs_backend::linear(dim, 6 * dim, &cfg.quantization_config, vb.pp("linear"))?; + diffusion_rs_backend::linear(dim, 6 * dim, &cfg.quantization_config, vb.pp("linear"))?; Ok(Self { lin, mod2: span!(tracing::Level::TRACE, "flux-mod2"), @@ -283,7 +283,7 @@ impl Modulation2 { .unsqueeze(1)? .chunk(6, D::Minus1)?; if ys.len() != 6 { - diffuse_rs_common::bail!("unexpected len from chunk {ys:?}") + diffusion_rs_common::bail!("unexpected len from chunk {ys:?}") } let mod1 = ModulationOut { shift: ys[0].clone(), @@ -322,21 +322,21 @@ impl SelfAttention { ) -> Result { let head_dim = dim / num_attention_heads; let (q, k, v, norm, proj) = if !context { - let q = diffuse_rs_backend::linear_b( + let q = diffusion_rs_backend::linear_b( dim, dim, qkv_bias, &cfg.quantization_config, vb.pp("to_q"), )?; - let k = diffuse_rs_backend::linear_b( + let k = diffusion_rs_backend::linear_b( dim, dim, qkv_bias, &cfg.quantization_config, vb.pp("to_k"), )?; - let v = diffuse_rs_backend::linear_b( + let v = diffusion_rs_backend::linear_b( dim, dim, qkv_bias, @@ -344,26 +344,30 @@ impl SelfAttention { vb.pp("to_v"), )?; let norm = QkNorm::new(head_dim, vb.pp("norm_q"), vb.pp("norm_k"))?; - let proj = - diffuse_rs_backend::linear(dim, dim, &cfg.quantization_config, vb.pp("to_out.0"))?; + let proj = diffusion_rs_backend::linear( + dim, + dim, + &cfg.quantization_config, + vb.pp("to_out.0"), + )?; (q, k, v, norm, proj) } else { - let q = diffuse_rs_backend::linear_b( + let q = diffusion_rs_backend::linear_b( dim, dim, qkv_bias, &cfg.quantization_config, vb.pp("add_q_proj"), )?; - let k = diffuse_rs_backend::linear_b( + let k = diffusion_rs_backend::linear_b( dim, dim, qkv_bias, &cfg.quantization_config, vb.pp("add_k_proj"), )?; - let v = diffuse_rs_backend::linear_b( + let v = diffusion_rs_backend::linear_b( dim, dim, qkv_bias, @@ -371,7 +375,7 @@ impl SelfAttention { vb.pp("add_v_proj"), )?; let norm = QkNorm::new(head_dim, vb.pp("norm_added_q"), vb.pp("norm_added_k"))?; - let proj = diffuse_rs_backend::linear( + let proj = diffusion_rs_backend::linear( dim, dim, &cfg.quantization_config, @@ -440,8 +444,9 @@ struct Mlp { impl Mlp { fn new(in_sz: usize, mlp_sz: usize, cfg: &Config, vb: VarBuilder) -> Result { let lin1 = - diffuse_rs_backend::linear(in_sz, mlp_sz, &cfg.quantization_config, vb.pp("0.proj"))?; - let lin2 = diffuse_rs_backend::linear(mlp_sz, in_sz, &cfg.quantization_config, vb.pp("2"))?; + diffusion_rs_backend::linear(in_sz, mlp_sz, &cfg.quantization_config, vb.pp("0.proj"))?; + let lin2 = + diffusion_rs_backend::linear(mlp_sz, in_sz, &cfg.quantization_config, vb.pp("2"))?; Ok(Self { lin1, lin2, @@ -450,7 +455,7 @@ impl Mlp { } } -impl diffuse_rs_common::core::Module for Mlp { +impl diffusion_rs_common::core::Module for Mlp { fn forward(&self, xs: &Tensor) -> Result { let _span = self.mlp.enter(); self.lin2 @@ -579,28 +584,28 @@ impl SingleStreamBlock { let mlp_sz = (h_sz as f64 * MLP_RATIO) as usize; let head_dim = h_sz / cfg.num_attention_heads; - let q = diffuse_rs_backend::linear_b( + let q = diffusion_rs_backend::linear_b( h_sz, h_sz, true, &cfg.quantization_config, vb.pp("attn.to_q"), )?; - let k = diffuse_rs_backend::linear_b( + let k = diffusion_rs_backend::linear_b( h_sz, h_sz, true, &cfg.quantization_config, vb.pp("attn.to_k"), )?; - let v = diffuse_rs_backend::linear_b( + let v = diffusion_rs_backend::linear_b( h_sz, h_sz, true, &cfg.quantization_config, vb.pp("attn.to_v"), )?; - let proj_mlp = diffuse_rs_backend::linear_b( + let proj_mlp = diffusion_rs_backend::linear_b( h_sz, mlp_sz, true, @@ -608,7 +613,7 @@ impl SingleStreamBlock { vb.pp("proj_mlp"), )?; - let linear2 = diffuse_rs_backend::linear( + let linear2 = diffusion_rs_backend::linear( h_sz + mlp_sz, h_sz, &cfg.quantization_config, @@ -667,13 +672,13 @@ pub struct LastLayer { impl LastLayer { fn new(h_sz: usize, p_sz: usize, out_c: usize, cfg: &Config, vb: VarBuilder) -> Result { let norm_final = layer_norm(h_sz, vb.pp("norm_final"))?; - let linear = diffuse_rs_backend::linear( + let linear = diffusion_rs_backend::linear( h_sz, p_sz * p_sz * out_c, &cfg.quantization_config, vb.pp("proj_out"), )?; - let ada_ln_modulation = diffuse_rs_backend::linear( + let ada_ln_modulation = diffusion_rs_backend::linear( h_sz, 2 * h_sz, &cfg.quantization_config, @@ -715,13 +720,13 @@ pub struct Flux { impl Flux { pub fn new(cfg: &Config, vb: VarBuilder) -> Result { - let img_in = diffuse_rs_backend::linear( + let img_in = diffusion_rs_backend::linear( cfg.in_channels, HIDDEN_SIZE, &cfg.quantization_config, vb.pp("x_embedder"), )?; - let txt_in = diffuse_rs_backend::linear( + let txt_in = diffusion_rs_backend::linear( cfg.joint_attention_dim, HIDDEN_SIZE, &cfg.quantization_config, @@ -793,10 +798,10 @@ impl Flux { guidance: Option<&Tensor>, ) -> Result { if txt.rank() != 3 { - diffuse_rs_common::bail!("unexpected shape for txt {:?}", txt.shape()) + diffusion_rs_common::bail!("unexpected shape for txt {:?}", txt.shape()) } if img.rank() != 3 { - diffuse_rs_common::bail!("unexpected shape for img {:?}", img.shape()) + diffusion_rs_common::bail!("unexpected shape for img {:?}", img.shape()) } let dtype = img.dtype(); let pe = { diff --git a/diffuse_rs_core/src/models/mod.rs b/diffusion_rs_core/src/models/mod.rs similarity index 93% rename from diffuse_rs_core/src/models/mod.rs rename to diffusion_rs_core/src/models/mod.rs index 7752367..d9e173a 100644 --- a/diffuse_rs_core/src/models/mod.rs +++ b/diffusion_rs_core/src/models/mod.rs @@ -6,8 +6,8 @@ mod vaes; use std::sync::Arc; pub use clip::{ClipTextConfig, ClipTextTransformer}; -use diffuse_rs_backend::QuantMethod; -use diffuse_rs_common::core::{Device, Result}; +use diffusion_rs_backend::QuantMethod; +use diffusion_rs_common::core::{Device, Result}; pub use flux::{FluxConfig, FluxModel}; pub use t5::{T5Config, T5EncoderModel}; diff --git a/diffuse_rs_core/src/models/t5/mod.rs b/diffusion_rs_core/src/models/t5/mod.rs similarity index 97% rename from diffuse_rs_core/src/models/t5/mod.rs rename to diffusion_rs_core/src/models/t5/mod.rs index cbd5602..09f1147 100644 --- a/diffuse_rs_core/src/models/t5/mod.rs +++ b/diffusion_rs_core/src/models/t5/mod.rs @@ -3,10 +3,10 @@ // T5 Text Model // https://github.com/huggingface/transformers/blob/main/src/transformers/models/t5/modeling_t5.py -use diffuse_rs_backend::{linear_no_bias, QuantMethod, QuantizedConfig}; -use diffuse_rs_common::core::{DType, Device, Module, Result, Tensor, D}; -use diffuse_rs_common::nn::{Activation, Embedding}; -use diffuse_rs_common::{embedding, VarBuilder}; +use diffusion_rs_backend::{linear_no_bias, QuantMethod, QuantizedConfig}; +use diffusion_rs_common::core::{DType, Device, Module, Result, Tensor, D}; +use diffusion_rs_common::nn::{Activation, Embedding}; +use diffusion_rs_common::{embedding, VarBuilder}; use serde::Deserialize; use std::sync::Arc; @@ -41,7 +41,7 @@ fn masked_fill(on_false: &Tensor, mask: &Tensor, on_true: f32) -> Result #[derive(Debug, Deserialize, Default, Clone, PartialEq)] pub struct ActivationWithOptionalGating { gated: bool, - activation: diffuse_rs_common::nn::Activation, + activation: diffusion_rs_common::nn::Activation, } fn deserialize_feed_forward_proj_activation<'de, D>( @@ -53,11 +53,11 @@ where match String::deserialize(deserializer)?.as_str() { "gated-gelu" => Ok(ActivationWithOptionalGating { gated: true, - activation: diffuse_rs_common::nn::Activation::NewGelu, + activation: diffusion_rs_common::nn::Activation::NewGelu, }), "gated-silu" => Ok(ActivationWithOptionalGating { gated: true, - activation: diffuse_rs_common::nn::Activation::Silu, + activation: diffusion_rs_common::nn::Activation::Silu, }), buf => { let activation = serde_plain::from_str(buf).map_err(serde::de::Error::custom)?; @@ -387,7 +387,7 @@ impl T5Attention { }, }; - let attn_weights = { diffuse_rs_common::nn::ops::softmax_last_dim(&scores)? }; + let attn_weights = { diffusion_rs_common::nn::ops::softmax_last_dim(&scores)? }; let attn_output = attn_weights.matmul(&v)?; let attn_output = attn_output .transpose(1, 2)? diff --git a/diffuse_rs_core/src/models/vaes/autoencoder_kl.rs b/diffusion_rs_core/src/models/vaes/autoencoder_kl.rs similarity index 93% rename from diffuse_rs_core/src/models/vaes/autoencoder_kl.rs rename to diffusion_rs_core/src/models/vaes/autoencoder_kl.rs index 736bae0..01a0330 100644 --- a/diffuse_rs_core/src/models/vaes/autoencoder_kl.rs +++ b/diffusion_rs_core/src/models/vaes/autoencoder_kl.rs @@ -1,6 +1,6 @@ -use diffuse_rs_common::core::{Result, Tensor}; -use diffuse_rs_common::nn::{Activation, Conv2d, Conv2dConfig}; -use diffuse_rs_common::VarBuilder; +use diffusion_rs_common::core::{Result, Tensor}; +use diffusion_rs_common::nn::{Activation, Conv2d, Conv2dConfig}; +use diffusion_rs_common::VarBuilder; use serde::Deserialize; use super::{ @@ -65,7 +65,7 @@ impl AutoEncoderKl { let decoder = Decoder::new(&cfg.clone().into(), vb.pp("decoder"))?; let reg = DiagonalGaussian::new(true, 1)?; let quant_conv = if cfg.use_quant_conv { - Some(diffuse_rs_common::conv2d( + Some(diffusion_rs_common::conv2d( 2 * cfg.latent_channels, 2 * cfg.latent_channels, 1, @@ -76,7 +76,7 @@ impl AutoEncoderKl { None }; let post_quant_conv = if cfg.use_post_quant_conv { - Some(diffuse_rs_common::conv2d( + Some(diffusion_rs_common::conv2d( cfg.latent_channels, cfg.latent_channels, 1, diff --git a/diffuse_rs_core/src/models/vaes/mod.rs b/diffusion_rs_core/src/models/vaes/mod.rs similarity index 94% rename from diffuse_rs_core/src/models/vaes/mod.rs rename to diffusion_rs_core/src/models/vaes/mod.rs index df16ffc..1a8c3d0 100644 --- a/diffuse_rs_core/src/models/vaes/mod.rs +++ b/diffusion_rs_core/src/models/vaes/mod.rs @@ -1,13 +1,13 @@ use std::sync::Arc; use autoencoder_kl::{AutencoderKlConfig, AutoEncoderKl}; -use diffuse_rs_common::{ +use diffusion_rs_common::{ core::{Device, Result, Tensor}, ModelSource, }; use serde::Deserialize; -use diffuse_rs_common::{from_mmaped_safetensors, FileData, VarBuilder}; +use diffusion_rs_common::{from_mmaped_safetensors, FileData, VarBuilder}; mod autoencoder_kl; mod vae; diff --git a/diffuse_rs_core/src/models/vaes/vae.rs b/diffusion_rs_core/src/models/vaes/vae.rs similarity index 88% rename from diffuse_rs_core/src/models/vaes/vae.rs rename to diffusion_rs_core/src/models/vaes/vae.rs index 231a16e..84d1035 100644 --- a/diffuse_rs_core/src/models/vaes/vae.rs +++ b/diffusion_rs_core/src/models/vaes/vae.rs @@ -1,8 +1,8 @@ #![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] -use diffuse_rs_common::core::{Result, Tensor, D}; -use diffuse_rs_common::nn::{Activation, Conv2d, Conv2dConfig, GroupNorm}; -use diffuse_rs_common::{conv2d, group_norm, linear, VarBuilder}; +use diffusion_rs_common::core::{Result, Tensor, D}; +use diffusion_rs_common::nn::{Activation, Conv2d, Conv2dConfig, GroupNorm}; +use diffusion_rs_common::{conv2d, group_norm, linear, VarBuilder}; use serde::Deserialize; use tracing::{span, Span}; @@ -29,7 +29,7 @@ fn scaled_dot_product_attention(q: &Tensor, k: &Tensor, v: &Tensor) -> Result Result { let _span = self.attn.enter(); let init_xs = xs; @@ -123,7 +123,7 @@ struct ResnetBlock { impl ResnetBlock { fn new(in_c: usize, out_c: usize, vb: VarBuilder, cfg: &VAEConfig) -> Result { - let conv_cfg = diffuse_rs_common::nn::Conv2dConfig { + let conv_cfg = diffusion_rs_common::nn::Conv2dConfig { padding: 1, ..Default::default() }; @@ -154,7 +154,7 @@ impl ResnetBlock { } } -impl diffuse_rs_common::core::Module for ResnetBlock { +impl diffusion_rs_common::core::Module for ResnetBlock { fn forward(&self, xs: &Tensor) -> Result { let _span = self.resnet.enter(); let h = xs @@ -179,7 +179,7 @@ struct Downsample { impl Downsample { fn new(in_c: usize, vb: VarBuilder) -> Result { - let conv_cfg = diffuse_rs_common::nn::Conv2dConfig { + let conv_cfg = diffusion_rs_common::nn::Conv2dConfig { stride: 2, ..Default::default() }; @@ -191,7 +191,7 @@ impl Downsample { } } -impl diffuse_rs_common::core::Module for Downsample { +impl diffusion_rs_common::core::Module for Downsample { fn forward(&self, xs: &Tensor) -> Result { let _span = self.downsample.enter(); let xs = xs.pad_with_zeros(D::Minus1, 0, 1)?; @@ -208,7 +208,7 @@ struct Upsample { impl Upsample { fn new(in_c: usize, vb: VarBuilder) -> Result { - let conv_cfg = diffuse_rs_common::nn::Conv2dConfig { + let conv_cfg = diffusion_rs_common::nn::Conv2dConfig { padding: 1, ..Default::default() }; @@ -220,7 +220,7 @@ impl Upsample { } } -impl diffuse_rs_common::core::Module for Upsample { +impl diffusion_rs_common::core::Module for Upsample { fn forward(&self, xs: &Tensor) -> Result { let _ = self.upsample.enter(); let (_, _, h, w) = xs.dims4()?; @@ -253,9 +253,11 @@ impl Encoder { .iter() .all(|x| x == "DownEncoderBlock2D") { - diffuse_rs_common::bail!("All down (encoder) block types must be `DownEncoderBlock2D`"); + diffusion_rs_common::bail!( + "All down (encoder) block types must be `DownEncoderBlock2D`" + ); } - let conv_cfg = diffuse_rs_common::nn::Conv2dConfig { + let conv_cfg = diffusion_rs_common::nn::Conv2dConfig { padding: 1, ..Default::default() }; @@ -291,8 +293,8 @@ impl Encoder { // TODO: this is technically not general enough. Should always start with 1 resnet, then unet num_layers (defaults to 1 so this is OK) // repeats of attention and resnet! - // https://github.com/huggingface/diffuse_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffuse_rs/models/unets/unet_2d_blocks.py#L644-L729 - // https://github.com/huggingface/diffuse_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffuse_rs/models/unets/unet_2d_blocks.py#L625 + // https://github.com/huggingface/diffusion_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffusion_rs/models/unets/unet_2d_blocks.py#L644-L729 + // https://github.com/huggingface/diffusion_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffusion_rs/models/unets/unet_2d_blocks.py#L625 let mid_block_1 = ResnetBlock::new(block_in, block_in, vb.pp("mid_block.resnets.0"), cfg)?; let mid_attn_1 = if cfg.mid_block_add_attention { Some(AttnBlock::new( @@ -325,7 +327,7 @@ impl Encoder { } } -impl diffuse_rs_common::nn::Module for Encoder { +impl diffusion_rs_common::nn::Module for Encoder { fn forward(&self, xs: &Tensor) -> Result { let mut h = xs.apply(&self.conv_in)?; for block in self.down.iter() { @@ -368,9 +370,9 @@ pub struct Decoder { impl Decoder { pub fn new(cfg: &VAEConfig, vb: VarBuilder) -> Result { if !cfg.up_block_types.iter().all(|x| x == "UpDecoderBlock2D") { - diffuse_rs_common::bail!("All up (decoder) block types must be `UpDecoderBlock2D`"); + diffusion_rs_common::bail!("All up (decoder) block types must be `UpDecoderBlock2D`"); } - let conv_cfg = diffuse_rs_common::nn::Conv2dConfig { + let conv_cfg = diffusion_rs_common::nn::Conv2dConfig { padding: 1, ..Default::default() }; @@ -379,8 +381,8 @@ impl Decoder { // TODO: this is technically not general enough. Should always start with 1 resnet, then unet num_layers (defaults to 1 so this is OK) // repeats of attention and resnet! - // https://github.com/huggingface/diffuse_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffuse_rs/models/unets/unet_2d_blocks.py#L644-L729 - // https://github.com/huggingface/diffuse_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffuse_rs/models/unets/unet_2d_blocks.py#L625 + // https://github.com/huggingface/diffusion_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffusion_rs/models/unets/unet_2d_blocks.py#L644-L729 + // https://github.com/huggingface/diffusion_rs/blob/243d9a49864ebb4562de6304a5fb9b9ebb496c6e/src/diffusion_rs/models/unets/unet_2d_blocks.py#L625 let mid_block_1 = ResnetBlock::new(block_in, block_in, vb.pp("mid_block.resnets.0"), cfg)?; let mid_attn_1 = if cfg.mid_block_add_attention { Some(AttnBlock::new( @@ -431,7 +433,7 @@ impl Decoder { } } -impl diffuse_rs_common::nn::Module for Decoder { +impl diffusion_rs_common::nn::Module for Decoder { fn forward(&self, xs: &Tensor) -> Result { let h = xs.apply(&self.conv_in)?; let mut h = h.apply(&self.mid_block_1)?; @@ -465,7 +467,7 @@ impl DiagonalGaussian { } } -impl diffuse_rs_common::nn::Module for DiagonalGaussian { +impl diffusion_rs_common::nn::Module for DiagonalGaussian { fn forward(&self, xs: &Tensor) -> Result { let chunks = xs.chunk(2, self.chunk_dim)?; if self.sample { diff --git a/diffuse_rs_core/src/pipelines/flux/mod.rs b/diffusion_rs_core/src/pipelines/flux/mod.rs similarity index 93% rename from diffuse_rs_core/src/pipelines/flux/mod.rs rename to diffusion_rs_core/src/pipelines/flux/mod.rs index 48e0a9d..1fecdaf 100644 --- a/diffuse_rs_core/src/pipelines/flux/mod.rs +++ b/diffusion_rs_core/src/pipelines/flux/mod.rs @@ -2,8 +2,8 @@ use std::sync::Mutex; use std::{cmp::Ordering, collections::HashMap, sync::Arc}; use anyhow::Result; -use diffuse_rs_common::core::{DType, Device, Tensor, D}; -use diffuse_rs_common::nn::Module; +use diffusion_rs_common::core::{DType, Device, Tensor, D}; +use diffusion_rs_common::nn::Module; use tokenizers::Tokenizer; use tracing::info; @@ -15,7 +15,7 @@ use crate::{ }, pipelines::ComponentName, }; -use diffuse_rs_common::{from_mmaped_safetensors, ModelSource}; +use diffusion_rs_common::{from_mmaped_safetensors, ModelSource}; use super::sampling::Sampler; use super::scheduler::SchedulerConfig; @@ -74,7 +74,7 @@ impl Loader for FluxLoader { let vocab_file = &files["tokenizer/vocab.json"]; let merges_file = &files["tokenizer/merges.txt"]; - diffuse_rs_common::load_bpe_tokenizer(vocab_file, merges_file, &source)? + diffusion_rs_common::load_bpe_tokenizer(vocab_file, merges_file, &source)? } else { anyhow::bail!("incorrect storage of clip tokenizer") }; @@ -201,11 +201,11 @@ impl FluxPipeline { fn tokenize_and_pad( prompts: Vec, tokenizer: &Tokenizer, - ) -> diffuse_rs_common::core::Result>> { + ) -> diffusion_rs_common::core::Result>> { let mut t5_tokens = Vec::new(); let unpadded_t5_tokens = tokenizer .encode_batch(prompts, true) - .map_err(|e| diffuse_rs_common::core::Error::Msg(e.to_string()))? + .map_err(|e| diffusion_rs_common::core::Error::Msg(e.to_string()))? .into_iter() .map(|e| e.get_ids().to_vec()) .collect::>(); @@ -225,7 +225,7 @@ impl ModelPipeline for FluxPipeline { prompts: Vec, params: DiffusionGenerationParams, offloading_type: Option, - ) -> diffuse_rs_common::core::Result { + ) -> diffusion_rs_common::core::Result { match offloading_type { Some(Offloading::Full) => { self.t5_model.to_device(&self.device)?; @@ -241,7 +241,7 @@ impl ModelPipeline for FluxPipeline { if !self.flux_model.is_guidance() { match t5_input_ids.dim(1)?.cmp(&256) { Ordering::Greater => { - diffuse_rs_common::bail!("T5 embedding length greater than 256, please shrink the prompt or use the -dev (with guidance distillation) version.") + diffusion_rs_common::bail!("T5 embedding length greater than 256, please shrink the prompt or use the -dev (with guidance distillation) version.") } Ordering::Less | Ordering::Equal => { t5_input_ids = @@ -300,7 +300,7 @@ impl ModelPipeline for FluxPipeline { } else { None }; - let step = |img: &Tensor, t_vec: &Tensor| -> diffuse_rs_common::core::Result { + let step = |img: &Tensor, t_vec: &Tensor| -> diffusion_rs_common::core::Result { self.flux_model.forward( img, &state.img_ids, diff --git a/diffuse_rs_core/src/pipelines/flux/sampling.rs b/diffusion_rs_core/src/pipelines/flux/sampling.rs similarity index 97% rename from diffuse_rs_core/src/pipelines/flux/sampling.rs rename to diffusion_rs_core/src/pipelines/flux/sampling.rs index 78f1bf1..7191d4b 100644 --- a/diffuse_rs_core/src/pipelines/flux/sampling.rs +++ b/diffusion_rs_core/src/pipelines/flux/sampling.rs @@ -1,6 +1,6 @@ #![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] -use diffuse_rs_common::core::{Device, Result, Tensor}; +use diffusion_rs_common::core::{Device, Result, Tensor}; pub fn get_noise( num_samples: usize, diff --git a/diffuse_rs_core/src/pipelines/mod.rs b/diffusion_rs_core/src/pipelines/mod.rs similarity index 97% rename from diffuse_rs_core/src/pipelines/mod.rs rename to diffusion_rs_core/src/pipelines/mod.rs index 56ad98b..ed5073b 100644 --- a/diffuse_rs_core/src/pipelines/mod.rs +++ b/diffusion_rs_core/src/pipelines/mod.rs @@ -9,12 +9,12 @@ use std::{ }; use anyhow::Result; -use diffuse_rs_common::core::{Device, Tensor}; +use diffusion_rs_common::core::{Device, Tensor}; use flux::FluxLoader; use image::{DynamicImage, RgbImage}; use serde::Deserialize; -use diffuse_rs_common::{FileData, FileLoader, ModelSource, NiceProgressBar, TokenSource}; +use diffusion_rs_common::{FileData, FileLoader, ModelSource, NiceProgressBar, TokenSource}; use tracing::info; /// Generation parameters. @@ -94,7 +94,7 @@ pub trait ModelPipeline: Send + Sync { prompts: Vec, params: DiffusionGenerationParams, offloading_type: Option, - ) -> diffuse_rs_common::core::Result; + ) -> diffusion_rs_common::core::Result; } #[derive(Clone, Debug, Deserialize)] @@ -251,7 +251,7 @@ impl Pipeline { #[allow(clippy::cast_possible_truncation)] images.push(DynamicImage::ImageRgb8( RgbImage::from_raw(w as u32, h as u32, flattened.to_vec1::()?).ok_or( - diffuse_rs_common::core::Error::Msg( + diffusion_rs_common::core::Error::Msg( "RgbImage has invalid capacity.".to_string(), ), )?, diff --git a/diffuse_rs_core/src/pipelines/sampling.rs b/diffusion_rs_core/src/pipelines/sampling.rs similarity index 98% rename from diffuse_rs_core/src/pipelines/sampling.rs rename to diffusion_rs_core/src/pipelines/sampling.rs index 91328b1..24f5bc3 100644 --- a/diffuse_rs_core/src/pipelines/sampling.rs +++ b/diffusion_rs_core/src/pipelines/sampling.rs @@ -1,4 +1,4 @@ -use diffuse_rs_common::{ +use diffusion_rs_common::{ core::{Result, Tensor}, NiceProgressBar, }; diff --git a/diffuse_rs_core/src/pipelines/scheduler.rs b/diffusion_rs_core/src/pipelines/scheduler.rs similarity index 96% rename from diffuse_rs_core/src/pipelines/scheduler.rs rename to diffusion_rs_core/src/pipelines/scheduler.rs index 58fb00f..ce1e5f0 100644 --- a/diffuse_rs_core/src/pipelines/scheduler.rs +++ b/diffusion_rs_core/src/pipelines/scheduler.rs @@ -1,4 +1,4 @@ -use diffuse_rs_common::core::{Context, Result}; +use diffusion_rs_common::core::{Context, Result}; use serde::Deserialize; #[derive(Deserialize, Clone)] diff --git a/diffuse_rs_examples/Cargo.toml b/diffusion_rs_examples/Cargo.toml similarity index 74% rename from diffuse_rs_examples/Cargo.toml rename to diffusion_rs_examples/Cargo.toml index c9cb455..2455d60 100644 --- a/diffuse_rs_examples/Cargo.toml +++ b/diffusion_rs_examples/Cargo.toml @@ -1,10 +1,10 @@ [package] -name = "diffuse_rs_examples" +name = "diffusion_rs_examples" readme.workspace = true authors.workspace = true version.workspace = true edition.workspace = true -description = "Examples of diffuse_rs" +description = "Examples of diffusion_rs" repository.workspace = true keywords.workspace = true categories.workspace = true @@ -12,7 +12,7 @@ license.workspace = true homepage.workspace = true [dependencies] -diffuse_rs_core = { path = "../diffuse_rs_core" } +diffusion_rs_core = { path = "../diffusion_rs_core" } anyhow.workspace = true clap.workspace = true tracing.workspace = true diff --git a/diffuse_rs_examples/examples/dduf/README.md b/diffusion_rs_examples/examples/dduf/README.md similarity index 100% rename from diffuse_rs_examples/examples/dduf/README.md rename to diffusion_rs_examples/examples/dduf/README.md diff --git a/diffuse_rs_examples/examples/dduf/main.rs b/diffusion_rs_examples/examples/dduf/main.rs similarity index 92% rename from diffuse_rs_examples/examples/dduf/main.rs rename to diffusion_rs_examples/examples/dduf/main.rs index 3b26431..431c753 100644 --- a/diffuse_rs_examples/examples/dduf/main.rs +++ b/diffusion_rs_examples/examples/dduf/main.rs @@ -1,7 +1,9 @@ use std::time::Instant; use clap::Parser; -use diffuse_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource}; +use diffusion_rs_core::{ + DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource, +}; use tracing::level_filters::LevelFilter; use tracing_subscriber::EnvFilter; diff --git a/diffuse_rs_examples/examples/flux/README.md b/diffusion_rs_examples/examples/flux/README.md similarity index 100% rename from diffuse_rs_examples/examples/flux/README.md rename to diffusion_rs_examples/examples/flux/README.md diff --git a/diffuse_rs_examples/examples/flux/main.rs b/diffusion_rs_examples/examples/flux/main.rs similarity index 94% rename from diffuse_rs_examples/examples/flux/main.rs rename to diffusion_rs_examples/examples/flux/main.rs index fd173d0..81f1665 100644 --- a/diffuse_rs_examples/examples/flux/main.rs +++ b/diffusion_rs_examples/examples/flux/main.rs @@ -1,6 +1,8 @@ use std::time::Instant; -use diffuse_rs_core::{DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource}; +use diffusion_rs_core::{ + DiffusionGenerationParams, ModelSource, Offloading, Pipeline, TokenSource, +}; use clap::{Parser, ValueEnum}; use tracing::level_filters::LevelFilter; diff --git a/diffuse_rs_py/Cargo.toml b/diffusion_rs_py/Cargo.toml similarity index 61% rename from diffuse_rs_py/Cargo.toml rename to diffusion_rs_py/Cargo.toml index 0404a84..4006933 100644 --- a/diffuse_rs_py/Cargo.toml +++ b/diffusion_rs_py/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "diffuse_rs_py" +name = "diffusion_rs_py" readme.workspace = true authors.workspace = true version.workspace = true @@ -12,13 +12,13 @@ license.workspace = true homepage.workspace = true [lib] -name = "diffuse_rs" +name = "diffusion_rs" crate-type = ["cdylib"] doc = false [dependencies] pyo3.workspace = true -diffuse_rs_core = { path = "../diffuse_rs_core" } +diffusion_rs_core = { path = "../diffusion_rs_core" } anyhow.workspace = true image.workspace = true @@ -26,8 +26,8 @@ image.workspace = true pyo3-build-config = "0.23" [features] -cuda = ["diffuse_rs_core/cuda"] -cudnn = ["diffuse_rs_core/cudnn"] -metal = ["diffuse_rs_core/metal"] -accelerate = ["diffuse_rs_core/accelerate"] -mkl = ["diffuse_rs_core/mkl"] +cuda = ["diffusion_rs_core/cuda"] +cudnn = ["diffusion_rs_core/cudnn"] +metal = ["diffusion_rs_core/metal"] +accelerate = ["diffusion_rs_core/accelerate"] +mkl = ["diffusion_rs_core/mkl"] diff --git a/diffuse_rs_py/build.rs b/diffusion_rs_py/build.rs similarity index 100% rename from diffuse_rs_py/build.rs rename to diffusion_rs_py/build.rs diff --git a/diffuse_rs_py/diffuse_rs.pyi b/diffusion_rs_py/diffuse_rs.pyi similarity index 100% rename from diffuse_rs_py/diffuse_rs.pyi rename to diffusion_rs_py/diffuse_rs.pyi diff --git a/diffuse_rs_py/examples/dduf.py b/diffusion_rs_py/examples/dduf.py similarity index 82% rename from diffuse_rs_py/examples/dduf.py rename to diffusion_rs_py/examples/dduf.py index 8033ef6..2f9113e 100644 --- a/diffuse_rs_py/examples/dduf.py +++ b/diffusion_rs_py/examples/dduf.py @@ -1,4 +1,4 @@ -from diffuse_rs import DiffusionGenerationParams, ModelSource, Pipeline +from diffusion_rs import DiffusionGenerationParams, ModelSource, Pipeline from PIL import Image import io diff --git a/diffuse_rs_py/examples/flux.py b/diffusion_rs_py/examples/flux.py similarity index 83% rename from diffuse_rs_py/examples/flux.py rename to diffusion_rs_py/examples/flux.py index 42f1b4a..ab27892 100644 --- a/diffuse_rs_py/examples/flux.py +++ b/diffusion_rs_py/examples/flux.py @@ -1,4 +1,4 @@ -from diffuse_rs import DiffusionGenerationParams, ModelSource, Pipeline +from diffusion_rs import DiffusionGenerationParams, ModelSource, Pipeline from PIL import Image import io diff --git a/diffusion_rs_py/generate_wheels.sh b/diffusion_rs_py/generate_wheels.sh new file mode 100644 index 0000000..56c21af --- /dev/null +++ b/diffusion_rs_py/generate_wheels.sh @@ -0,0 +1,64 @@ +################################### +### UPLOADING +################################### + +# ⚠️⚠️⚠️⚠️ Be sure to update the `project.name` field in `pyproject.toml`!! ⚠️⚠️⚠️⚠️ +# diffusion_rs, diffusion_rs_cuda, diffusion_rs_metal, diffusion_rs_mkl, diffusion_rs_accelerate + +## testpypi: +# twine upload --repository pypi --password PASSWORD --username __token__ wheels-NAME/*.whl + + +## pypi: +# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cuda/*.whl +# twine upload --repository pypi --password PASSWORD --username __token__ wheels-mkl/*.whl +# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cuda/*.whl +# twine upload --repository pypi --password PASSWORD --username __token__ wheels-metal/*.whl +# ⚠️ Need both x86_64 and aarch64 builds before this! ⚠️ +# twine upload --repository pypi --password PASSWORD --username __token__ wheels-cpu/*.whl + + +################################### +#### MAC: Aarch64 Manylinux and OSX +################################### + +docker build -t wheelmaker:latest -f Dockerfile.manylinux . +docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.10 +docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.11 +docker run --rm -v .:/io wheelmaker build --release -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.12 + +maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.10 +maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.11 +maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.12 + +# Metal + +maturin build -o wheels-metal -m diffusion_rs_py/Cargo.toml --interpreter python3.10 --features metal +maturin build -o wheels-metal -m diffusion_rs_py/Cargo.toml --interpreter python3.11 --features metal +maturin build -o wheels-metal -m diffusion_rs_py/Cargo.toml --interpreter python3.12 --features metal + +# Accelerate + +maturin build -o wheels-accelerate -m diffusion_rs_py/Cargo.toml --interpreter python3.10 --features accelerate +maturin build -o wheels-accelerate -m diffusion_rs_py/Cargo.toml --interpreter python3.11 --features accelerate +maturin build -o wheels-accelerate -m diffusion_rs_py/Cargo.toml --interpreter python3.12 --features accelerate + +#################################### +# WINDOWS: x86_64 Manylinux, Windows +#################################### + +maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.10 +maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.11 +maturin build -o wheels-cpu -m diffusion_rs_py/Cargo.toml --interpreter python3.12 + +# CUDA + +maturin build -o wheels-cuda -m diffusion_rs_py/Cargo.toml --interpreter python3.10 --features cuda +maturin build -o wheels-cuda -m diffusion_rs_py/Cargo.toml --interpreter python3.11 --features cuda +maturin build -o wheels-cuda -m diffusion_rs_py/Cargo.toml --interpreter python3.12 --features cuda + +# MKL + +maturin build -o wheels-mkl -m diffusion_rs_py/Cargo.toml --interpreter python3.10 --features mkl +maturin build -o wheels-mkl -m diffusion_rs_py/Cargo.toml --interpreter python3.11 --features mkl +maturin build -o wheels-mkl -m diffusion_rs_py/Cargo.toml --interpreter python3.12 --features mkl diff --git a/diffuse_rs_py/pyproject.toml b/diffusion_rs_py/pyproject.toml similarity index 92% rename from diffuse_rs_py/pyproject.toml rename to diffusion_rs_py/pyproject.toml index 2e2c6ca..458884c 100644 --- a/diffuse_rs_py/pyproject.toml +++ b/diffusion_rs_py/pyproject.toml @@ -3,8 +3,8 @@ requires = ["maturin==1.7"] build-backend = "maturin" [project] -name = "diffuse_rs" -version = "0.1.3" +name = "diffusion_rs" +version = "0.1.0" requires-python = ">=3.10" classifiers = [ "Programming Language :: Rust", diff --git a/diffuse_rs_py/src/lib.rs b/diffusion_rs_py/src/lib.rs similarity index 83% rename from diffuse_rs_py/src/lib.rs rename to diffusion_rs_py/src/lib.rs index a885f44..2e9f039 100644 --- a/diffuse_rs_py/src/lib.rs +++ b/diffusion_rs_py/src/lib.rs @@ -66,7 +66,7 @@ impl DiffusionGenerationParams { } #[pyclass] -pub struct Pipeline(diffuse_rs_core::Pipeline); +pub struct Pipeline(diffusion_rs_core::Pipeline); #[pymethods] impl Pipeline { @@ -86,21 +86,21 @@ impl Pipeline { offloading: Option, ) -> PyResult { let token = token - .map(diffuse_rs_core::TokenSource::Literal) - .unwrap_or(diffuse_rs_core::TokenSource::CacheToken); + .map(diffusion_rs_core::TokenSource::Literal) + .unwrap_or(diffusion_rs_core::TokenSource::CacheToken); let source = match source { ModelSource::DdufFile { file } => { - diffuse_rs_core::ModelSource::dduf(file).map_err(wrap_anyhow_error)? + diffusion_rs_core::ModelSource::dduf(file).map_err(wrap_anyhow_error)? } ModelSource::ModelId { model_id } => { - diffuse_rs_core::ModelSource::from_model_id(model_id) + diffusion_rs_core::ModelSource::from_model_id(model_id) } }; let offloading = offloading.map(|offloading| match offloading { - Offloading::Full => diffuse_rs_core::Offloading::Full, + Offloading::Full => diffusion_rs_core::Offloading::Full, }); Ok(Self( - diffuse_rs_core::Pipeline::load(source, silent, token, revision, offloading) + diffusion_rs_core::Pipeline::load(source, silent, token, revision, offloading) .map_err(wrap_anyhow_error)?, )) } @@ -114,7 +114,7 @@ impl Pipeline { .0 .forward( prompts, - diffuse_rs_core::DiffusionGenerationParams { + diffusion_rs_core::DiffusionGenerationParams { height: params.height, width: params.width, num_steps: params.num_steps, @@ -138,7 +138,7 @@ impl Pipeline { } #[pymodule] -fn diffuse_rs(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { +fn diffusion_rs(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?;