Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into fix-dsv2
Browse files Browse the repository at this point in the history
  • Loading branch information
lzhangzz committed Jan 14, 2025
2 parents 6f603bc + 8b7812b commit fc62c92
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 24 deletions.
5 changes: 4 additions & 1 deletion lmdeploy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,11 +443,12 @@ def match(cls, model_path: str) -> Optional[str]:
model_path (str): the model path used for matching.
"""
path = model_path.lower()
if all([c not in path for c in ['internlm2', '8k']]) and \
if all([c not in path for c in ['internlm3', 'internlm2', '8k']]) and \
all([c in path for c in ['internlm', 'chat']]):
return 'internlm'


@MODELS.register_module(name='internlm3')
@MODELS.register_module(name='internlm2')
class InternLM2Chat7B(InternLMChat7B):
"""Chat template and generation parameters of InternLM2-Chat-7B."""
Expand Down Expand Up @@ -490,6 +491,8 @@ def match(cls, model_path: str) -> Optional[str]:
path = model_path.lower()
if 'internlm2' in path and ('chat' in path or 'math' in path):
return 'internlm2'
if 'internlm3' in path and ('instruct' in path):
return 'internlm3'

def messages2prompt(self,
messages,
Expand Down
1 change: 0 additions & 1 deletion lmdeploy/serve/async_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,6 @@ def stream_infer(
**kwargs)

async def _get_prompt_input(self,
session_id: int,
prompt: str,
do_preprocess: bool,
sequence_start: bool,
Expand Down
39 changes: 20 additions & 19 deletions lmdeploy/serve/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,25 @@ async def _async_get_logits(
logits = [None] * len(input_ids)

async def _proc(i):
async for out in self.generate(
messages=None,
input_ids=input_ids[i],
step=0 if steps is None else steps[i],
session_id=i,
# `max_new_tokens=0` means we don't need engine to
# generate tokens and `output_logits=all` requests engine
# to output logits of all input tokens
gen_config=GenerationConfig(max_new_tokens=0,
output_logits='all'),
stream_response=False,
sequence_start=sequence_start,
sequence_end=sequence_end):
# In the last iteration, the yielded `out` is an empty response
# indicating the finish_reason, which should be ignored here
if out.finish_reason is None:
# Try not to return in async for loop. Otherwise, there
# will be `GeneratorExit` exception
logits[i] = out.logits
async with self.model_inst(session_id=i) as inst:
input_len = len(input_ids[i])
# TODO(lvhan): Fix the ugly code later on
max_new_tokens = 1 if self.backend == 'turbomind' else 0
gen_config = GenerationConfig(max_new_tokens=max_new_tokens,
output_logits='all')
async with self.safe_run(inst,
session_id=i,
input_ids=input_ids[i],
gen_config=gen_config,
stream_output=False,
sequence_start=sequence_start,
sequence_end=sequence_end,
step=steps[i] if steps else 0) as gen:
async for outputs in gen:
pass
logits[i] = outputs.logits[:input_len, :]
if sequence_end and self.backend == 'pytorch':
await inst.async_end(session_id=i)

tasks = [_proc(i) for i in range(len(input_ids))]
await asyncio.gather(*tasks)
Expand Down Expand Up @@ -211,4 +211,5 @@ def _get_ppl(self,
loss = flat_loss_matrix.sum()
target_count = target_mask.sum()
result.append(loss.item() / target_count.item())
logger.info(f'ppl result: {result}')
return result
2 changes: 1 addition & 1 deletion lmdeploy/turbomind/deploy/source_model/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def model_info(self):

return dict(
size_per_head=head_dim,
rotary_embedding=hidden_units // attn_head_num,
rotary_embedding=head_dim,
num_layer=num_layer,
norm_eps=norm_eps,
head_num=attn_head_num,
Expand Down
2 changes: 2 additions & 0 deletions lmdeploy/turbomind/supported_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
InternLMForCausalLM='llama',
# internlm2
InternLM2ForCausalLM='internlm2',
# internlm3
InternLM3ForCausalLM='llama',
# llama, llama2, alpaca, vicuna, codellama, ultracm, yi,
# deepseek-coder, deepseek-llm
LlamaForCausalLM='llama',
Expand Down
2 changes: 0 additions & 2 deletions tests/test_lmdeploy/test_auto_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ def models(self):
('tiiuae/falcon-7b-instruct', True, False),
('01-ai/Yi-34B-Chat', True, True),
('codellama/CodeLlama-7b-Instruct-hf', True, True),
('mistralai/Mistral-7B-Instruct-v0.1', True, True),
('mistralai/Mixtral-8x7B-Instruct-v0.1', True, True),
('Qwen/Qwen-7B-Chat', True, True),
('Qwen/Qwen-VL-Chat', False, True),
('Qwen/Qwen1.5-4B-Chat', True, True),
Expand Down

0 comments on commit fc62c92

Please sign in to comment.