Passiolife · kells1986 · Jun 27, 2023 · Jun 27, 2023 · Jun 28, 2023
diff --git a/notebooks/export.ipynb b/notebooks/export.ipynb
diff --git a/segment_anything/build_sam.py b/segment_anything/build_sam.py
@@ -102,6 +102,6 @@ def _build_sam(
     sam.eval()
     if checkpoint is not None:
         with open(checkpoint, "rb") as f:
-            state_dict = torch.load(f)
+            state_dict = torch.load(f, map_location=None if torch.cuda.is_available() else torch.device('cpu'))
         sam.load_state_dict(state_dict)
     return sam
diff --git a/segment_anything/utils/onnx.py b/segment_anything/utils/onnx.py
@@ -13,7 +13,6 @@
 from ..modeling import Sam
 from .amg import calculate_stability_score
 
-
 class SamOnnxModel(nn.Module):
     """
     This model should not be called directly, but is used in ONNX export.
@@ -142,3 +141,65 @@ def forward(
             return upscaled_masks, scores, stability_scores, areas, masks
 
         return upscaled_masks, scores, masks
+
+
+
+
+
+
+class SimpleSamOnnxModel(SamOnnxModel):
+    """
+    This model should not be called directly, but is used in ONNX export.
+    It combines the prompt encoder, mask decoder, and mask postprocessing of Sam,
+    with some functions modified to enable model tracing. Also supports extra
+    options controlling what information. See the ONNX export script for details.
+    """
+
+    def __init__(
+        self,
+        model: Sam,
+    ) -> None:
+        super().__init__(model=model,
+                         return_single_mask=True,
+                         return_extra_metrics=False,
+                         use_stability_score=True)
+
+    @torch.no_grad()
+    def forward(
+        self,
+        x: torch.Tensor,
+        point_coords: torch.Tensor
+    ):
+        #Generate the image embeddings in the model so it works end-to-end
+        img = self.model.preprocess(x)
+        image_embeddings = self.model.image_encoder(img)
+
+        orig_im_size = torch.Tensor([x.shape[-2], x.shape[-1]]).float()
+        #Remove these from arguments for simplicity
+        mask_input_size = [4 * x for x in self.model.prompt_encoder.image_embedding_size]
+        mask_input = torch.zeros(1,1,*mask_input_size, dtype=torch.float)
+        has_mask_input = torch.tensor([0], dtype=torch.float)
+
+        #Only returning a single mask, just add all labels as 1
+        point_labels = torch.ones(1, point_coords.shape[1], dtype=torch.long)
+        sparse_embedding = self._embed_points(point_coords, point_labels)
+        dense_embedding = self._embed_masks(mask_input, has_mask_input)
+
+        masks, scores = self.model.mask_decoder.predict_masks(
+            image_embeddings=image_embeddings,
+            image_pe=self.model.prompt_encoder.get_dense_pe(),
+            sparse_prompt_embeddings=sparse_embedding,
+            dense_prompt_embeddings=dense_embedding,
+        )
+
+
+        scores = calculate_stability_score(
+            masks, self.model.mask_threshold, self.stability_score_offset
+        )
+
+        masks, scores = self.select_masks(masks, scores, point_coords.shape[1])
+
+        upscaled_masks = self.mask_postprocessing(masks, orig_im_size)
+
+        #apply Sigmoid to the masks
+        return torch.sigmoid(upscaled_masks), scores, torch.sigmoid(masks)