Skip to content

Commit

Permalink
[llm bench] remove outdated code from pt models loading (#1481)
Browse files Browse the repository at this point in the history
CVS-150917
fix loading gptq models and some mismatches in files when model exported
without trust_remote_code and executed within
  • Loading branch information
eaidova authored Jan 6, 2025
1 parent db71b36 commit dd74ac1
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions tools/llm_bench/llm_bench_utils/pt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,14 @@ def create_text_gen_model(model_path, device, **kwargs):
model_class = PT_MODEL_CLASSES_MAPPING.get(model_type, PT_MODEL_CLASSES_MAPPING[default_model_type])
token_class = TOKENIZE_CLASSES_MAPPING.get(model_type, TOKENIZE_CLASSES_MAPPING[default_model_type])
start = time.perf_counter()
if model_type == 'chatglm':
model = model_class.from_pretrained(model_path, trust_remote_code=True).to('cpu', dtype=float)
else:
model = model_class.from_pretrained(model_path, trust_remote_code=True)
tokenizer = token_class.from_pretrained(model_path, trust_remote_code=True)
trust_remote_code = False
try:
model = model_class.from_pretrained(model_path, trust_remote_code=trust_remote_code)
except Exception:
start = time.perf_counter()
trust_remote_code = True
model = model_class.from_pretrained(model_path, trust_remote_code=trust_remote_code)
tokenizer = token_class.from_pretrained(model_path, trust_remote_code=trust_remote_code)
end = time.perf_counter()
from_pretrain_time = end - start
else:
Expand Down

0 comments on commit dd74ac1

Please sign in to comment.