Fix max_new_tokens for HPU predictcor (#226)

* fix max_new_tokens * Update llm_on_ray/inference/predictors/hpu_predictor.py Co-authored-by: Zhi Lin <[email protected]> Signed-off-by: Carson Wang <[email protected]> * update --------- Signed-off-by: Carson Wang <[email protected]> Co-authored-by: Zhi Lin <[email protected]>
intel · May 20, 2024 · 30b3204 · 30b3204
1 parent 620800f
commit 30b3204
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/llm_on_ray/inference/predictors/hpu_predictor.py b/llm_on_ray/inference/predictors/hpu_predictor.py
@@ -176,7 +176,7 @@ def _process_config(self, config):
         config["lazy_mode"] = self.use_lazy_mode
         config["hpu_graphs"] = self.use_hpu_graphs
         # max_new_tokens is required for hpu
-        if "max_new_tokens" not in config:
+        if config.get("max_new_tokens", None) is None:
             config["max_new_tokens"] = 128
 
     def get_streamer(self):