From 795b95bdcb22d788c1a062b47d1fe871d768c806 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 8 Nov 2023 15:20:06 +0100 Subject: [PATCH] `ultralytics 8.0.208` automatic thread-safe inference (#6185) Signed-off-by: Glenn Jocher Co-authored-by: Kayzwer <68285002+Kayzwer@users.noreply.github.com> Co-authored-by: Muhammad Rizwan Munawar Co-authored-by: PIW <56834479+parkilwoo@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/guides/index.md | 2 + docs/guides/yolo-thread-safe-inference.md | 108 +++++++++++++++++++ docs/hub/app/android.md | 23 +++++ docs/hub/app/index.md | 34 +++--- docs/hub/app/ios.md | 23 +++++ docs/hub/index.md | 17 ++- docs/index.md | 15 +++ docs/modes/predict.md | 27 +++++ docs/quickstart.md | 11 -- docs/usage/cli.md | 11 ++ mkdocs.yml | 4 +- ultralytics/__init__.py | 2 +- ultralytics/data/utils.py | 2 +- ultralytics/engine/predictor.py | 120 +++++++++++----------- 14 files changed, 306 insertions(+), 93 deletions(-) create mode 100644 docs/guides/yolo-thread-safe-inference.md diff --git a/docs/guides/index.md b/docs/guides/index.md index 1bf8eb35fa4..a59762e3dff 100644 --- a/docs/guides/index.md +++ b/docs/guides/index.md @@ -16,6 +16,7 @@ Here's a compilation of in-depth guides to help you master different aspects of * [YOLO Common Issues](yolo-common-issues.md) ⭐ RECOMMENDED: Practical solutions and troubleshooting tips to the most frequently encountered issues when working with Ultralytics YOLO models. * [YOLO Performance Metrics](yolo-performance-metrics.md) ⭐ ESSENTIAL: Understand the key metrics like mAP, IoU, and F1 score used to evaluate the performance of your YOLO models. Includes practical examples and tips on how to improve detection accuracy and speed. +* [Model Deployment Options](model-deployment-options.md): Overview of YOLO model deployment formats like ONNX, OpenVINO, and TensorRT, with pros and cons for each to inform your deployment strategy. * [K-Fold Cross Validation](kfold-cross-validation.md) 🚀 NEW: Learn how to improve model generalization using K-Fold cross-validation technique. * [Hyperparameter Tuning](hyperparameter-tuning.md) 🚀 NEW: Discover how to optimize your YOLO models by fine-tuning hyperparameters using the Tuner class and genetic evolution algorithms. * [SAHI Tiled Inference](sahi-tiled-inference.md) 🚀 NEW: Comprehensive guide on leveraging SAHI's sliced inference capabilities with YOLOv8 for object detection in high-resolution images. @@ -24,6 +25,7 @@ Here's a compilation of in-depth guides to help you master different aspects of * [Docker Quickstart](docker-quickstart.md) 🚀 NEW: Complete guide to setting up and using Ultralytics YOLO models with [Docker](https://hub.docker.com/r/ultralytics/ultralytics). Learn how to install Docker, manage GPU support, and run YOLO models in isolated containers for consistent development and deployment. * [Raspberry Pi](raspberry-pi.md) 🚀 NEW: Quickstart tutorial to run YOLO models to the latest Raspberry Pi hardware. * [Triton Inference Server Integration](triton-inference-server.md) 🚀 NEW: Dive into the integration of Ultralytics YOLOv8 with NVIDIA's Triton Inference Server for scalable and efficient deep learning inference deployments. +* [YOLO Thread-Safe Inference](yolo-thread-safe-inference.md) 🚀 NEW: Guidelines for performing inference with YOLO models in a thread-safe manner. Learn the importance of thread safety and best practices to prevent race conditions and ensure consistent predictions. ## Contribute to Our Guides diff --git a/docs/guides/yolo-thread-safe-inference.md b/docs/guides/yolo-thread-safe-inference.md new file mode 100644 index 00000000000..abf7a36198f --- /dev/null +++ b/docs/guides/yolo-thread-safe-inference.md @@ -0,0 +1,108 @@ +--- +comments: true +description: This guide provides best practices for performing thread-safe inference with YOLO models, ensuring reliable and concurrent predictions in multi-threaded applications. +keywords: thread-safe, YOLO inference, multi-threading, concurrent predictions, YOLO models, Ultralytics, Python threading, safe YOLO usage, AI concurrency +--- + +# Thread-Safe Inference with YOLO Models + +Running YOLO models in a multi-threaded environment requires careful consideration to ensure thread safety. Python's `threading` module allows you to run several threads concurrently, but when it comes to using YOLO models across these threads, there are important safety issues to be aware of. This page will guide you through creating thread-safe YOLO model inference. + +## Understanding Python Threading + +Python threads are a form of parallelism that allow your program to run multiple operations at once. However, Python's Global Interpreter Lock (GIL) means that only one thread can execute Python bytecode at a time. + +

+ Single vs Multi-Thread Examples +

+ +While this sounds like a limitation, threads can still provide concurrency, especially for I/O-bound operations or when using operations that release the GIL, like those performed by YOLO's underlying C libraries. + +## The Danger of Shared Model Instances + +Instantiating a YOLO model outside your threads and sharing this instance across multiple threads can lead to race conditions, where the internal state of the model is inconsistently modified due to concurrent accesses. This is particularly problematic when the model or its components hold state that is not designed to be thread-safe. + +### Non-Thread-Safe Example: Single Model Instance + +When using threads in Python, it's important to recognize patterns that can lead to concurrency issues. Here is what you should avoid: sharing a single YOLO model instance across multiple threads. + +```python +# Unsafe: Sharing a single model instance across threads +from ultralytics import YOLO +from threading import Thread + +# Instantiate the model outside the thread +shared_model = YOLO("yolov8n.pt") + + +def predict(image_path): + results = shared_model.predict(image_path) + # Process results + + +# Starting threads that share the same model instance +Thread(target=predict, args=("image1.jpg",)).start() +Thread(target=predict, args=("image2.jpg",)).start() +``` + +In the example above, the `shared_model` is used by multiple threads, which can lead to unpredictable results because `predict` could be executed simultaneously by multiple threads. + +### Non-Thread-Safe Example: Multiple Model Instances + +Similarly, here is an unsafe pattern with multiple YOLO model instances: + +```python +# Unsafe: Sharing multiple model instances across threads can still lead to issues +from ultralytics import YOLO +from threading import Thread + +# Instantiate multiple models outside the thread +shared_model_1 = YOLO("yolov8n_1.pt") +shared_model_2 = YOLO("yolov8n_2.pt") + + +def predict(model, image_path): + results = model.predict(image_path) + # Process results + + +# Starting threads with individual model instances +Thread(target=predict, args=(shared_model_1, "image1.jpg")).start() +Thread(target=predict, args=(shared_model_2, "image2.jpg")).start() +``` + +Even though there are two separate model instances, the risk of concurrency issues still exists. If the internal implementation of `YOLO` is not thread-safe, using separate instances might not prevent race conditions, especially if these instances share any underlying resources or states that are not thread-local. + +## Thread-Safe Inference + +To perform thread-safe inference, you should instantiate a separate YOLO model within each thread. This ensures that each thread has its own isolated model instance, eliminating the risk of race conditions. + +### Thread-Safe Example + +Here's how to instantiate a YOLO model inside each thread for safe parallel inference: + +```python +# Safe: Instantiating a single model inside each thread +from ultralytics import YOLO +from threading import Thread + + +def thread_safe_predict(image_path): + # Instantiate a new model inside the thread + local_model = YOLO("yolov8n.pt") + results = local_model.predict(image_path) + # Process results + + +# Starting threads that each have their own model instance +Thread(target=thread_safe_predict, args=("image1.jpg",)).start() +Thread(target=thread_safe_predict, args=("image2.jpg",)).start() +``` + +In this example, each thread creates its own `YOLO` instance. This prevents any thread from interfering with the model state of another, thus ensuring that each thread performs inference safely and without unexpected interactions with the other threads. + +## Conclusion + +When using YOLO models with Python's `threading`, always instantiate your models within the thread that will use them to ensure thread safety. This practice avoids race conditions and makes sure that your inference tasks run reliably. + +For more advanced scenarios and to further optimize your multi-threaded inference performance, consider using process-based parallelism with `multiprocessing` or leveraging a task queue with dedicated worker processes. diff --git a/docs/hub/app/android.md b/docs/hub/app/android.md index e1f4f75fb10..e6acfaf5beb 100644 --- a/docs/hub/app/android.md +++ b/docs/hub/app/android.md @@ -6,6 +6,29 @@ keywords: Ultralytics, Android App, real-time object detection, YOLO models, Ten # Ultralytics Android App: Real-time Object Detection with YOLO Models + + Ultralytics HUB preview image +
+
+ Ultralytics GitHub + + Ultralytics LinkedIn + + Ultralytics Twitter + + Ultralytics YouTube + + Ultralytics TikTok + + Ultralytics Instagram + + Ultralytics Discord +
+
+ +   +
+ The Ultralytics Android App is a powerful tool that allows you to run YOLO models directly on your Android device for real-time object detection. This app utilizes TensorFlow Lite for model optimization and various hardware delegates for acceleration, enabling fast and efficient object detection. ## Quantization and Acceleration diff --git a/docs/hub/app/index.md b/docs/hub/app/index.md index 581125fad42..0f12bacc343 100644 --- a/docs/hub/app/index.md +++ b/docs/hub/app/index.md @@ -10,29 +10,25 @@ keywords: Ultralytics, HUB App, YOLOv5, YOLOv8, mobile AI, real-time object dete Ultralytics HUB preview image
- - - - - - - - - - - - - - - - - + Ultralytics GitHub + + Ultralytics LinkedIn + + Ultralytics Twitter + + Ultralytics YouTube + + Ultralytics TikTok + + Ultralytics Instagram + + Ultralytics Discord

- -   + +  
Welcome to the Ultralytics HUB App! We are excited to introduce this powerful mobile app that allows you to run YOLOv5 and YOLOv8 models directly on your [iOS](https://apps.apple.com/xk/app/ultralytics/id1583935240) and [Android](https://play.google.com/store/apps/details?id=com.ultralytics.ultralytics_app) devices. With the HUB App, you can utilize hardware acceleration features like Apple's Neural Engine (ANE) or Android GPU and Neural Network API (NNAPI) delegates to achieve impressive performance on your mobile device. diff --git a/docs/hub/app/ios.md b/docs/hub/app/ios.md index c202eec8b4d..82a4e956c90 100644 --- a/docs/hub/app/ios.md +++ b/docs/hub/app/ios.md @@ -6,6 +6,29 @@ keywords: Ultralytics, iOS app, object detection, YOLO models, real time, Apple # Ultralytics iOS App: Real-time Object Detection with YOLO Models + + Ultralytics HUB preview image +
+
+ Ultralytics GitHub + + Ultralytics LinkedIn + + Ultralytics Twitter + + Ultralytics YouTube + + Ultralytics TikTok + + Ultralytics Instagram + + Ultralytics Discord +
+
+ + +
+ The Ultralytics iOS App is a powerful tool that allows you to run YOLO models directly on your iPhone or iPad for real-time object detection. This app utilizes the Apple Neural Engine and Core ML for model optimization and acceleration, enabling fast and efficient object detection. ## Quantization and Acceleration diff --git a/docs/hub/index.md b/docs/hub/index.md index 6f428602003..01ab107552e 100644 --- a/docs/hub/index.md +++ b/docs/hub/index.md @@ -9,14 +9,27 @@ keywords: Ultralytics HUB, YOLOv5, YOLOv8, model training, model deployment, pre Ultralytics HUB preview image
-
+ Ultralytics GitHub + + Ultralytics LinkedIn + + Ultralytics Twitter + + Ultralytics YouTube + + Ultralytics TikTok + + Ultralytics Instagram + + Ultralytics Discord +
+
CI CPU Open In Colab
-
👋 Hello from the [Ultralytics](https://ultralytics.com/) Team! We've been working hard these last few months to launch [Ultralytics HUB](https://bit.ly/ultralytics_hub), a new web tool for training and deploying all your YOLOv5 and YOLOv8 🚀 models from one spot! diff --git a/docs/index.md b/docs/index.md index f38aa02619f..5126e78add5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,6 +9,21 @@ keywords: Ultralytics, YOLOv8, object detection, image segmentation, machine lea Ultralytics YOLO banner

+ Ultralytics GitHub + + Ultralytics LinkedIn + + Ultralytics Twitter + + Ultralytics YouTube + + Ultralytics TikTok + + Ultralytics Instagram + + Ultralytics Discord +
+
Ultralytics CI Ultralytics Code Coverage YOLOv8 Citation diff --git a/docs/modes/predict.md b/docs/modes/predict.md index 7c928169237..6a1b62cc3cb 100644 --- a/docs/modes/predict.md +++ b/docs/modes/predict.md @@ -641,6 +641,33 @@ You can use the `plot()` method of a `Result` objects to visualize predictions. | `masks` | `bool` | Whether to plot the masks. | `True` | | `probs` | `bool` | Whether to plot classification probability | `True` | +## Thread-Safe Inference + +Ensuring thread safety during inference is crucial when you are running multiple YOLO models in parallel across different threads. Thread-safe inference guarantees that each thread's predictions are isolated and do not interfere with one another, avoiding race conditions and ensuring consistent and reliable outputs. + +When using YOLO models in a multi-threaded application, it's important to instantiate separate model objects for each thread or employ thread-local storage to prevent conflicts: + +!!! example "Thread-Safe Inference" + + Instantiate a single model inside each thread for thread-safe inference: + ```python + from ultralytics import YOLO + from threading import Thread + + def thread_safe_predict(image_path): + # Instantiate a new model inside the thread + local_model = YOLO("yolov8n.pt") + results = local_model.predict(image_path) + # Process results + + + # Starting threads that each have their own model instance + Thread(target=thread_safe_predict, args=("image1.jpg",)).start() + Thread(target=thread_safe_predict, args=("image2.jpg",)).start() + ``` + +For an in-depth look at thread-safe inference with YOLO models and step-by-step instructions, please refer to our [YOLO Thread-Safe Inference Guide](../guides/yolo-thread-safe-inference.md). This guide will provide you with all the necessary information to avoid common pitfalls and ensure that your multi-threaded inference runs smoothly. + ## Streaming Source `for`-loop Here's a Python script using OpenCV (`cv2`) and YOLOv8 to run inference on video frames. This script assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). diff --git a/docs/quickstart.md b/docs/quickstart.md index 0d7f71a015b..3baed00df8a 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -118,17 +118,6 @@ Ultralytics provides various installation methods including pip, conda, and Dock See the `ultralytics` [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) file for a list of dependencies. Note that all examples above install all required dependencies. -

-
- -
- Watch: Ultralytics YOLO for Object Detection: Quickstart Guide for Installation and Setup. -

- !!! tip "Tip" PyTorch requirements vary by operating system and CUDA requirements, so it's recommended to install PyTorch first following instructions at [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally). diff --git a/docs/usage/cli.md b/docs/usage/cli.md index 550d469bf2d..d33156c1b29 100644 --- a/docs/usage/cli.md +++ b/docs/usage/cli.md @@ -8,6 +8,17 @@ keywords: Ultralytics, YOLO, CLI, train, validation, prediction, command line in The YOLO command line interface (CLI) allows for simple single-line commands without the need for a Python environment. CLI requires no customization or Python code. You can simply run all tasks from the terminal with the `yolo` command. +

+
+ +
+ Watch: Mastering Ultralytics YOLOv8: CLI & Python Usage and Live Inference +

+ !!! example === "Syntax" diff --git a/mkdocs.yml b/mkdocs.yml index bbf58e10696..09369f388ab 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -215,7 +215,9 @@ nav: - Guides: - guides/index.md - YOLO Common Issues: guides/yolo-common-issues.md - - Performance Metrics: guides/yolo-performance-metrics.md + - YOLO Performance Metrics: guides/yolo-performance-metrics.md + - YOLO Thread-Safe Inference: guides/yolo-thread-safe-inference.md + - Model Deployment Options: guides/model-deployment-options.md - K-Fold Cross Validation: guides/kfold-cross-validation.md - Hyperparameter Tuning: guides/hyperparameter-tuning.md - SAHI Tiled Inference: guides/sahi-tiled-inference.md diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 6859d967103..726e11044c8 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = '8.0.207' +__version__ = '8.0.208' from ultralytics.models import RTDETR, SAM, YOLO from ultralytics.models.fastsam import FastSAM diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index 68876e67574..aa0e14e5daa 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -300,7 +300,7 @@ def check_det_dataset(dataset, autodownload=True): data[k] = [str((path / x).resolve()) for x in data[k]] # Parse YAML - train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download')) + val, s = (data.get(x) for x in ('val', 'download')) if val: val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path if not all(x.exists() for x in val): diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py index 367efc694df..f41e21a95ee 100644 --- a/ultralytics/engine/predictor.py +++ b/ultralytics/engine/predictor.py @@ -28,6 +28,7 @@ yolov8n_paddle_model # PaddlePaddle """ import platform +import threading from pathlib import Path import cv2 @@ -106,6 +107,7 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): self.transforms = None self.callbacks = _callbacks or callbacks.get_default_callbacks() self.txt_path = None + self._lock = threading.Lock() # for automatic thread-safe inference callbacks.add_integration_callbacks(self) def preprocess(self, im): @@ -231,64 +233,66 @@ def stream_inference(self, source=None, model=None, *args, **kwargs): if not self.model: self.setup_model(model) - # Setup source every time predict is called - self.setup_source(source if source is not None else self.args.source) - - # Check if save_dir/ label file exists - if self.args.save or self.args.save_txt: - (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) - - # Warmup model - if not self.done_warmup: - self.model.warmup(imgsz=(1 if self.model.pt or self.model.triton else self.dataset.bs, 3, *self.imgsz)) - self.done_warmup = True - - self.seen, self.windows, self.batch, profilers = 0, [], None, (ops.Profile(), ops.Profile(), ops.Profile()) - self.run_callbacks('on_predict_start') - for batch in self.dataset: - self.run_callbacks('on_predict_batch_start') - self.batch = batch - path, im0s, vid_cap, s = batch - - # Preprocess - with profilers[0]: - im = self.preprocess(im0s) - - # Inference - with profilers[1]: - preds = self.inference(im, *args, **kwargs) - - # Postprocess - with profilers[2]: - self.results = self.postprocess(preds, im, im0s) - self.run_callbacks('on_predict_postprocess_end') - - # Visualize, save, write results - n = len(im0s) - for i in range(n): - self.seen += 1 - self.results[i].speed = { - 'preprocess': profilers[0].dt * 1E3 / n, - 'inference': profilers[1].dt * 1E3 / n, - 'postprocess': profilers[2].dt * 1E3 / n} - p, im0 = path[i], None if self.source_type.tensor else im0s[i].copy() - p = Path(p) - - if self.args.verbose or self.args.save or self.args.save_txt or self.args.show: - s += self.write_results(i, self.results, (p, im, im0)) - if self.args.save or self.args.save_txt: - self.results[i].save_dir = self.save_dir.__str__() - if self.args.show and self.plotted_img is not None: - self.show(p) - if self.args.save and self.plotted_img is not None: - self.save_preds(vid_cap, i, str(self.save_dir / p.name)) - - self.run_callbacks('on_predict_batch_end') - yield from self.results - - # Print time (inference-only) - if self.args.verbose: - LOGGER.info(f'{s}{profilers[1].dt * 1E3:.1f}ms') + with self._lock: # for thread-safe inference + # Setup source every time predict is called + self.setup_source(source if source is not None else self.args.source) + + # Check if save_dir/ label file exists + if self.args.save or self.args.save_txt: + (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) + + # Warmup model + if not self.done_warmup: + self.model.warmup(imgsz=(1 if self.model.pt or self.model.triton else self.dataset.bs, 3, *self.imgsz)) + self.done_warmup = True + + self.seen, self.windows, self.batch, profilers = 0, [], None, (ops.Profile(), ops.Profile(), ops.Profile()) + self.run_callbacks('on_predict_start') + + for batch in self.dataset: + self.run_callbacks('on_predict_batch_start') + self.batch = batch + path, im0s, vid_cap, s = batch + + # Preprocess + with profilers[0]: + im = self.preprocess(im0s) + + # Inference + with profilers[1]: + preds = self.inference(im, *args, **kwargs) + + # Postprocess + with profilers[2]: + self.results = self.postprocess(preds, im, im0s) + + self.run_callbacks('on_predict_postprocess_end') + # Visualize, save, write results + n = len(im0s) + for i in range(n): + self.seen += 1 + self.results[i].speed = { + 'preprocess': profilers[0].dt * 1E3 / n, + 'inference': profilers[1].dt * 1E3 / n, + 'postprocess': profilers[2].dt * 1E3 / n} + p, im0 = path[i], None if self.source_type.tensor else im0s[i].copy() + p = Path(p) + + if self.args.verbose or self.args.save or self.args.save_txt or self.args.show: + s += self.write_results(i, self.results, (p, im, im0)) + if self.args.save or self.args.save_txt: + self.results[i].save_dir = self.save_dir.__str__() + if self.args.show and self.plotted_img is not None: + self.show(p) + if self.args.save and self.plotted_img is not None: + self.save_preds(vid_cap, i, str(self.save_dir / p.name)) + + self.run_callbacks('on_predict_batch_end') + yield from self.results + + # Print time (inference-only) + if self.args.verbose: + LOGGER.info(f'{s}{profilers[1].dt * 1E3:.1f}ms') # Release assets if isinstance(self.vid_writer[-1], cv2.VideoWriter):