Set n_batch correctly

louisgv · louisgv · Jul 2, 2023 · Jul 3, 2023 · Jul 14, 2023 · Jul 18, 2023
commit 4b8fe59aaaae16de201df121c4995bfc969e6a04
diff --git a/apps/desktop/src-tauri/src/inference/process.rs b/apps/desktop/src-tauri/src/inference/process.rs
@@ -65,9 +65,6 @@ impl InferenceThreadRequest {
 fn get_inference_params(
   completion_request: &CompletionRequest,
 ) -> InferenceParameters {
-  let n_threads = model::pool::get_n_threads();
-  let n_batch = if get_use_gpu() { 240 } else { n_threads };
-
   InferenceParameters {
     sampler: Arc::new(completion_request.to_top_p_top_k()),
   }
@@ -92,7 +89,23 @@ pub fn start(req: InferenceThreadRequest) -> JoinHandle<()> {
       }
     };
 
-    let mut session = model.start_session(Default::default());
+    let n_threads = model::pool::get_n_threads();
+
+    // set the batch_size according to the accelerator
+    let backend = llm::ggml_get_accelerator();
+    let n_batch = match backend{
+      llm::GgmlAccelerator::Metal =>  if get_use_gpu() {1} else {n_threads}, // 1 is the only supported batch size for Metal
+      llm::GgmlAccelerator::None => n_threads,
+      _ => if get_use_gpu() {512} else {n_threads}
+    };
+
+    let session_config = llm::InferenceSessionConfig {
+      n_batch: n_batch,
+      n_threads: n_threads,
+      ..Default::default()
+    };
+
+    let mut session = model.start_session(session_config);
 
     let mut output_request = OutputRequest::default();