Skip to content

Commit

Permalink
set paged attn block size as a env parameter (#1109)
Browse files Browse the repository at this point in the history
* set default block size

Signed-off-by: jiqing-feng <[email protected]>

* decoding use single query

Signed-off-by: jiqing-feng <[email protected]>

* set block size as a env parameter

Signed-off-by: jiqing-feng <[email protected]>

* set different default value for block size based on device

Signed-off-by: jiqing-feng <[email protected]>

---------

Signed-off-by: jiqing-feng <[email protected]>
  • Loading branch information
jiqing-feng authored Jan 16, 2025
1 parent 726191f commit 7b4044d
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion optimum/exporters/ipex/cache_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import List, Optional, Tuple

import torch
Expand Down Expand Up @@ -44,7 +45,8 @@ def __init__(
self.batch_size = batch_size
# Used in `generate` to keep tally of how many tokens the cache has seen
self._seen_tokens = torch.zeros([batch_size], dtype=torch.int32, device=device)
self.block_size = 64
default_block_size = 16 if device.type == "cpu" else 64
self.block_size = int(os.environ.get("OI_PAGED_ATTN_BLOCK_SIZE", str(default_block_size)))
self.num_blocks = (max_cache_len // self.block_size + (max_cache_len % self.block_size != 0)) * batch_size
self.block_tables = -1 * torch.ones([self.num_blocks], dtype=torch.int32, device=device).reshape(
batch_size, -1
Expand Down

0 comments on commit 7b4044d

Please sign in to comment.