diff --git a/optimum/exporters/ipex/cache_utils.py b/optimum/exporters/ipex/cache_utils.py
index 24344bc9e5..c8868716a4 100644
--- a/optimum/exporters/ipex/cache_utils.py
+++ b/optimum/exporters/ipex/cache_utils.py
@@ -41,6 +41,7 @@ def __init__(
     ) -> None:
         super().__init__()
         self.max_batch_size = max_batch_size
+        self.batch_size = max_batch_size
         self.kv_cache = []
 
         self._seen_tokens = max_batch_size * [
diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py
index b19e45af1e..270a9b32da 100644
--- a/optimum/intel/ipex/modeling_base.py
+++ b/optimum/intel/ipex/modeling_base.py
@@ -142,6 +142,8 @@ def __init__(
 
         self.input_names = set(inspect.signature(model.forward).parameters)
 
+        if self._is_ipex_exported:
+            model = _patch_model(model)
         # Registers the IPEXModelForXXX classes into the transformers AutoModel classes to avoid warnings when creating
         # a pipeline https://github.com/huggingface/transformers/blob/cad61b68396a1a387287a8e2e2fef78a25b79383/src/transformers/pipelines/base.py#L863
         AutoConfig.register(self.base_model_prefix, AutoConfig)
@@ -238,11 +240,6 @@ def _from_pretrained(
             _commit_hash=commit_hash,
             **model_kwargs,
         )
-        if is_torch_xpu_available(check_device=True):
-            model.to("xpu:0")
-
-        if _is_patched_with_ipex(model, task):
-            model = _patch_model(model)
         return cls(model, config=config, export=True, **kwargs)
 
     def _save_pretrained(self, save_directory: Union[str, Path]):