Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tiling_1d_loop #30

Merged
merged 1 commit into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions blog/2024-11-21-optimizing-matrix-mul/code/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions blog/2024-11-21-optimizing-matrix-mul/code/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ members = [
"crates/gpu/workgroup_256",
"crates/gpu/workgroup_2d",
"crates/gpu/tiling_1d",
"crates/gpu/tiling_1d_loop",
"crates/gpu/tiling_2d_simd",
#
# ---- The rust code that runs both on the GPU and the CPU. ----
Expand Down
17 changes: 17 additions & 0 deletions blog/2024-11-21-optimizing-matrix-mul/code/benches/gpu_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ fn bench_all_variants(c: &mut Criterion) {
let multiplier_workgroup_256 = matmul::workgroup_256::wgpu();
let multiplier_workgroup_2d = matmul::workgroup_2d::wgpu();
let multiplier_tiling_1d = matmul::tiling_1d::wgpu();
let multiplier_tiling_1d_loop = matmul::tiling_1d_loop::wgpu();
let multiplier_tiling_2d_simd = matmul::tiling_2d_simd::wgpu();
let multiplier_isomorphic_gpu = matmul::isomorphic::wgpu();

Expand Down Expand Up @@ -108,6 +109,22 @@ fn bench_all_variants(c: &mut Criterion) {
},
);

group.bench_with_input(
BenchmarkId::new("tiling_1d_loop:wgpu", format!("{}x{}x{}", m, k, n)),
&(m, k, n),
|bench, &(m, k, n)| {
bench.iter(|| {
black_box(multiplier_tiling_1d_loop.multiply(
black_box(&a),
black_box(&b),
m,
k,
n,
))
});
},
);

group.bench_with_input(
BenchmarkId::new("tiling_2d_simd:wgpu", format!("{}x{}x{}", m, k, n)),
&(m, k, n),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ fn main() {
run_tests(matmul::workgroup_256::wgpu(), &sizes);
run_tests(matmul::workgroup_2d::wgpu(), &sizes);
run_tests(matmul::tiling_1d::wgpu(), &sizes);
run_tests(matmul::tiling_1d_loop::wgpu(), &sizes);
run_tests(matmul::tiling_2d_simd::wgpu(), &sizes);

run_tests(matmul::isomorphic::wgpu(), &sizes);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "compiled_tiling_1d_loop"
version = "0.1.0"
edition = "2021"

[lib]
crate-type = ["lib", "cdylib"]

[build-dependencies]
spirv-builder = { git = "https://github.com/rust-gpu/rust-gpu", rev = "0da80f8a61867590a0824873fa45dc8983e49da8" }
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use spirv_builder::{MetadataPrintout, SpirvBuilder};
use std::env;
use std::fs;
use std::path::{Path, PathBuf};

fn main() -> Result<(), Box<dyn std::error::Error>> {
let gpu_crate_path = Path::new("../../../gpu/tiling_1d_loop");

// Compile the shader crate with SpirvBuilder.
let result = SpirvBuilder::new(gpu_crate_path, "spirv-unknown-vulkan1.2")
.print_metadata(MetadataPrintout::Full)
.build()?;

// Get the compiled shader as a PathBuf and read its binary content.
let shader_path = result.module.unwrap_single();
let shader_binary = fs::read(&shader_path)?;

// Generate Rust code with a constant holding the shader binary content.
let shader_binary_literal = shader_binary
.iter()
.map(|byte| format!("0x{:02X}", byte))
.collect::<Vec<_>>()
.join(", ");
let generated_code = format!(
"/// Compiled SPIR-V shader binary\n\
pub const SHADER_BINARY: &[u8] = &[{}];",
shader_binary_literal
);

// Write this generated code to `OUT_DIR` as `shader_binary.rs`.
let out_dir = PathBuf::from(env::var("OUT_DIR")?);
let shader_binary_rs = out_dir.join("shader_binary.rs");
fs::write(&shader_binary_rs, generated_code)?;

println!("Generated shader binary constant at {:?}", shader_binary_rs);
Ok(())
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Including the raw bytes generated shader binary in our rust code. This "bloats" the
// binary, but it also means you don't have to worry about the shader file being
// misplaced or deleted.
include!(concat!(env!("OUT_DIR"), "/shader_binary.rs"));
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ compiled_naive = { path = "../compiled_for_gpu/naive" }
compiled_workgroup_256 = { path = "../compiled_for_gpu/workgroup_256" }
compiled_workgroup_2d = { path = "../compiled_for_gpu/workgroup_2d" }
compiled_tiling_1d = { path = "../compiled_for_gpu/tiling_1d" }
compiled_tiling_1d_loop = { path = "../compiled_for_gpu/tiling_1d_loop" }
compiled_tiling_2d_simd = { path = "../compiled_for_gpu/tiling_2d_simd" }
compiled_isomorphic = { path = "../compiled_for_gpu/isomorphic" }
# The CPU side of the isomophic implementation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ pub mod tiling_1d {
}
}

pub mod tiling_1d_loop {
use super::*;
use crate::backends::wgpu::MatrixMultiplier;

pub fn wgpu() -> MatrixMultiplier<variants::Tiling1dLoop> {
futures::executor::block_on(MatrixMultiplier::new(variants::Tiling1dLoop))
}
}

pub mod tiling_2d_simd {
use super::*;
use crate::backends::wgpu::MatrixMultiplier;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,36 @@ impl GridComputation for Tiling1d {
}
}

/// GPU implementation of matrix multiplication with one-dimensional tiling (using loops).
pub struct Tiling1dLoop;

impl Display for Tiling1dLoop {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "tiling_1d_loop")
}
}

impl Gpu for Tiling1dLoop {
fn compiled_shader(&self) -> &[u8] {
compiled_tiling_1d_loop::SHADER_BINARY
}
}

impl GridComputation for Tiling1dLoop {
fn workgroup(&self) -> UVec3 {
UVec3::new(16, 16, 1)
}

fn dispatch_count(&self, m: u32, n: u32) -> UVec3 {
let workgroup = self.workgroup();
UVec3::new(
(m + workgroup.x - 1) / workgroup.x,
(n + workgroup.y - 1) / workgroup.y,
1,
)
}
}

/// GPU implementation of matrix multiplication with two-dimensional tiling.
pub struct Tiling2dSimd;

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "tiling_1d_loop"
version = "0.1.0"
edition = "2021"

[lib]
crate-type = ["dylib", "lib"]

[dependencies]
settings = { path = "../../shared/settings"}
spirv-std.workspace = true
Loading