Skip to content

Commit

Permalink
Add tiling_1d_loop (#30)
Browse files Browse the repository at this point in the history
This is tiling_1d but with loops.
  • Loading branch information
LegNeato authored Nov 21, 2024
1 parent 3f91b13 commit 872163a
Show file tree
Hide file tree
Showing 15 changed files with 352 additions and 0 deletions.
16 changes: 16 additions & 0 deletions blog/2024-11-21-optimizing-matrix-mul/code/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions blog/2024-11-21-optimizing-matrix-mul/code/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ members = [
"crates/gpu/workgroup_256",
"crates/gpu/workgroup_2d",
"crates/gpu/tiling_1d",
"crates/gpu/tiling_1d_loop",
"crates/gpu/tiling_2d_simd",
#
# ---- The rust code that runs both on the GPU and the CPU. ----
Expand Down
17 changes: 17 additions & 0 deletions blog/2024-11-21-optimizing-matrix-mul/code/benches/gpu_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ fn bench_all_variants(c: &mut Criterion) {
let multiplier_workgroup_256 = matmul::workgroup_256::wgpu();
let multiplier_workgroup_2d = matmul::workgroup_2d::wgpu();
let multiplier_tiling_1d = matmul::tiling_1d::wgpu();
let multiplier_tiling_1d_loop = matmul::tiling_1d_loop::wgpu();
let multiplier_tiling_2d_simd = matmul::tiling_2d_simd::wgpu();
let multiplier_isomorphic_gpu = matmul::isomorphic::wgpu();

Expand Down Expand Up @@ -108,6 +109,22 @@ fn bench_all_variants(c: &mut Criterion) {
},
);

group.bench_with_input(
BenchmarkId::new("tiling_1d_loop:wgpu", format!("{}x{}x{}", m, k, n)),
&(m, k, n),
|bench, &(m, k, n)| {
bench.iter(|| {
black_box(multiplier_tiling_1d_loop.multiply(
black_box(&a),
black_box(&b),
m,
k,
n,
))
});
},
);

group.bench_with_input(
BenchmarkId::new("tiling_2d_simd:wgpu", format!("{}x{}x{}", m, k, n)),
&(m, k, n),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ fn main() {
run_tests(matmul::workgroup_256::wgpu(), &sizes);
run_tests(matmul::workgroup_2d::wgpu(), &sizes);
run_tests(matmul::tiling_1d::wgpu(), &sizes);
run_tests(matmul::tiling_1d_loop::wgpu(), &sizes);
run_tests(matmul::tiling_2d_simd::wgpu(), &sizes);

run_tests(matmul::isomorphic::wgpu(), &sizes);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "compiled_tiling_1d_loop"
version = "0.1.0"
edition = "2021"

[lib]
crate-type = ["lib", "cdylib"]

[build-dependencies]
spirv-builder = { git = "https://github.com/rust-gpu/rust-gpu", rev = "0da80f8a61867590a0824873fa45dc8983e49da8" }
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use spirv_builder::{MetadataPrintout, SpirvBuilder};
use std::env;
use std::fs;
use std::path::{Path, PathBuf};

fn main() -> Result<(), Box<dyn std::error::Error>> {
let gpu_crate_path = Path::new("../../../gpu/tiling_1d_loop");

// Compile the shader crate with SpirvBuilder.
let result = SpirvBuilder::new(gpu_crate_path, "spirv-unknown-vulkan1.2")
.print_metadata(MetadataPrintout::Full)
.build()?;

// Get the compiled shader as a PathBuf and read its binary content.
let shader_path = result.module.unwrap_single();
let shader_binary = fs::read(&shader_path)?;

// Generate Rust code with a constant holding the shader binary content.
let shader_binary_literal = shader_binary
.iter()
.map(|byte| format!("0x{:02X}", byte))
.collect::<Vec<_>>()
.join(", ");
let generated_code = format!(
"/// Compiled SPIR-V shader binary\n\
pub const SHADER_BINARY: &[u8] = &[{}];",
shader_binary_literal
);

// Write this generated code to `OUT_DIR` as `shader_binary.rs`.
let out_dir = PathBuf::from(env::var("OUT_DIR")?);
let shader_binary_rs = out_dir.join("shader_binary.rs");
fs::write(&shader_binary_rs, generated_code)?;

println!("Generated shader binary constant at {:?}", shader_binary_rs);
Ok(())
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Including the raw bytes generated shader binary in our rust code. This "bloats" the
// binary, but it also means you don't have to worry about the shader file being
// misplaced or deleted.
include!(concat!(env!("OUT_DIR"), "/shader_binary.rs"));
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ compiled_naive = { path = "../compiled_for_gpu/naive" }
compiled_workgroup_256 = { path = "../compiled_for_gpu/workgroup_256" }
compiled_workgroup_2d = { path = "../compiled_for_gpu/workgroup_2d" }
compiled_tiling_1d = { path = "../compiled_for_gpu/tiling_1d" }
compiled_tiling_1d_loop = { path = "../compiled_for_gpu/tiling_1d_loop" }
compiled_tiling_2d_simd = { path = "../compiled_for_gpu/tiling_2d_simd" }
compiled_isomorphic = { path = "../compiled_for_gpu/isomorphic" }
# The CPU side of the isomophic implementation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ pub mod tiling_1d {
}
}

pub mod tiling_1d_loop {
use super::*;
use crate::backends::wgpu::MatrixMultiplier;

pub fn wgpu() -> MatrixMultiplier<variants::Tiling1dLoop> {
futures::executor::block_on(MatrixMultiplier::new(variants::Tiling1dLoop))
}
}

pub mod tiling_2d_simd {
use super::*;
use crate::backends::wgpu::MatrixMultiplier;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,36 @@ impl GridComputation for Tiling1d {
}
}

/// GPU implementation of matrix multiplication with one-dimensional tiling (using loops).
pub struct Tiling1dLoop;

impl Display for Tiling1dLoop {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "tiling_1d_loop")
}
}

impl Gpu for Tiling1dLoop {
fn compiled_shader(&self) -> &[u8] {
compiled_tiling_1d_loop::SHADER_BINARY
}
}

impl GridComputation for Tiling1dLoop {
fn workgroup(&self) -> UVec3 {
UVec3::new(16, 16, 1)
}

fn dispatch_count(&self, m: u32, n: u32) -> UVec3 {
let workgroup = self.workgroup();
UVec3::new(
(m + workgroup.x - 1) / workgroup.x,
(n + workgroup.y - 1) / workgroup.y,
1,
)
}
}

/// GPU implementation of matrix multiplication with two-dimensional tiling.
pub struct Tiling2dSimd;

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "tiling_1d_loop"
version = "0.1.0"
edition = "2021"

[lib]
crate-type = ["dylib", "lib"]

[dependencies]
settings = { path = "../../shared/settings"}
spirv-std.workspace = true
Loading

0 comments on commit 872163a

Please sign in to comment.