Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Config fixes for VLLMModel #472

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions src/lighteval/models/vllm/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
GenerativeResponse,
LoglikelihoodResponse,
)
from lighteval.models.utils import _get_dtype, _simplify_name
from lighteval.models.utils import _get_dtype, _get_model_sha, _simplify_name
from lighteval.tasks.requests import (
GreedyUntilRequest,
LoglikelihoodRequest,
Expand Down Expand Up @@ -89,6 +89,9 @@ class VLLMModelConfig:
subfolder: Optional[str] = None
temperature: float = 0.6 # will be used for multi sampling tasks, for tasks requiring no sampling, this will be ignored and set to 0.

def get_model_sha(self):
return _get_model_sha(repo_id=self.pretrained, revision=self.revision)


class VLLMModel(LightevalModel):
def __init__(
Expand All @@ -113,10 +116,10 @@ def __init__(
self.multichoice_continuations_start_space = config.multichoice_continuations_start_space

self.model_name = _simplify_name(config.pretrained)
self.model_sha = "" # config.get_model_sha()
self.model_sha = config.get_model_sha()
self.precision = _get_dtype(config.dtype, config=self._config)

self.model_info = ModelInfo(model_name=self.model_name, model_sha=self.model_sha)
self.model_info = ModelInfo(model_name=self.model_name, model_sha=self.model_sha, model_dtype=config.dtype)
self.pairwise_tokenization = config.pairwise_tokenization

@property
Expand Down Expand Up @@ -191,7 +194,7 @@ def _create_auto_tokenizer(self, config: VLLMModelConfig, env_config: EnvConfig)
config.pretrained,
tokenizer_mode="auto",
trust_remote_code=config.trust_remote_code,
tokenizer_revision=config.revision,
revision=config.revision + (f"/{config.subfolder}" if config.subfolder is not None else ""),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the update re-revision - not sure about why you're doing the next addition to the path howver?

Copy link
Member Author

@anton-l anton-l Jan 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed this pattern in other implementations, so it probably makes sense to standardize while we're at it

tokenizer = AutoTokenizer.from_pretrained(
model_name if tokenizer_name is None else tokenizer_name,
revision=revision + (f"/{subfolder}" if subfolder is not None else ""),

Pinging @NathanHB for whether it's applicable to vllm

)
tokenizer.pad_token = tokenizer.eos_token
return tokenizer
Expand Down
Loading