diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index 68149d2dc019f..67454c40d4b53 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -6,6 +6,7 @@ on: push: branches: - main + - develop paths: - '**/*.h' - '**/*.cpp' @@ -15,6 +16,7 @@ on: pull_request: branches: - main + - develop paths: - '**/*.h' - '**/*.cpp' diff --git a/csrc/activation_kernels.cu b/csrc/activation_kernels.cu index 94a1c4a678cf7..8c3e74e502a55 100644 --- a/csrc/activation_kernels.cu +++ b/csrc/activation_kernels.cu @@ -93,20 +93,21 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) { // Launch activation and gating kernel. #ifdef USE_ROCM -#define LAUNCH_SCALED_ACTIVATION_GATE_KERNEL(KERNEL) \ - int d = input.size(-1) / 2; \ - int64_t num_tokens = input.numel() / input.size(-1); \ - dim3 grid(num_tokens); \ - dim3 block(std::min(d, 1024)); \ - const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \ - const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \ - VLLM_DISPATCH_FLOATING_TYPES( \ - input.scalar_type(), "scaled_act_and_mul_kernel", [&] { \ - vllm::scaled_act_and_mul_kernel> \ - <<>>(out.data_ptr(), \ - input.data_ptr(), d, \ - 1.0 / (*scale.data_ptr())); \ - }); + #define LAUNCH_SCALED_ACTIVATION_GATE_KERNEL(KERNEL) \ + int d = input.size(-1) / 2; \ + int64_t num_tokens = input.numel() / input.size(-1); \ + dim3 grid(num_tokens); \ + dim3 block(std::min(d, 1024)); \ + const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \ + const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \ + VLLM_DISPATCH_FLOATING_TYPES( \ + input.scalar_type(), "scaled_act_and_mul_kernel", [&] { \ + vllm::scaled_act_and_mul_kernel> \ + <<>>( \ + out.data_ptr(), \ + input.data_ptr(), d, \ + 1.0 / (*scale.data_ptr())); \ + }); #endif void silu_and_mul(torch::Tensor& out, // [..., d] diff --git a/csrc/layernorm_kernels.cu b/csrc/layernorm_kernels.cu index de098a9ee0c19..405ba213628f6 100644 --- a/csrc/layernorm_kernels.cu +++ b/csrc/layernorm_kernels.cu @@ -247,7 +247,7 @@ void rms_norm(torch::Tensor& out, // [..., hidden_size] LAUNCH_RMS_NORM(0); } #else - LAUNCH_RMS_NORM(0); + LAUNCH_RMS_NORM(0); #endif }