Mikubill · huchenlei · Nov 13, 2023 · Nov 12, 2023 · Nov 13, 2023 · Nov 13, 2023
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -48,17 +48,26 @@ jobs:
         uses: actions/cache@v3
         with:
           path: stable-diffusion-webui/extensions/sd-webui-controlnet/models/
-          key: controlnet-models-v1
+          key: controlnet-models-v2
       - name: Cache Preprocessor models
         uses: actions/cache@v3
         with:
           path: stable-diffusion-webui/extensions/sd-webui-controlnet/annotator/downloads/
           key: preprocessor-models-v1
       - name: Download controlnet model for testing
         run: |
-          if [ ! -f "extensions/sd-webui-controlnet/models/control_v11p_sd15_canny.pth" ]; then
-            curl -Lo extensions/sd-webui-controlnet/models/control_v11p_sd15_canny.pth https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_canny.pth
-          fi
+          declare -a urls=(
+            "https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_canny.pth"
+            "https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter-full-face_sd15.safetensors"
+            "https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter-plus-face_sd15.safetensors"
+          )
+
+          for url in "${urls[@]}"; do
+            filename="extensions/sd-webui-controlnet/models/${url##*/}"  # Extracts the last part of the URL
+            if [ ! -f "$filename" ]; then
+              curl -Lo "$filename" "$url"
+            fi
+          done
         working-directory: stable-diffusion-webui
       - name: Start test server
         run: >
@@ -94,10 +103,10 @@ jobs:
         if: always()
         with:
           name: output
-          path: output.txt
+          path: stable-diffusion-webui/output.txt
       - name: Upload coverage HTML
         uses: actions/upload-artifact@v3
         if: always()
         with:
           name: htmlcov
-          path: htmlcov
+          path: stable-diffusion-webui/htmlcov
diff --git a/scripts/controlmodel_ipadapter.py b/scripts/controlmodel_ipadapter.py
@@ -7,6 +7,20 @@
 SD_V12_CHANNELS = [320] * 4 + [640] * 4 + [1280] * 4 + [1280] * 6 + [640] * 6 + [320] * 6 + [1280] * 2
 SD_XL_CHANNELS = [640] * 8 + [1280] * 40 + [1280] * 60 + [640] * 12 + [1280] * 20
 
+class MLPProjModel(torch.nn.Module):
+    def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024):
+        super().__init__()
+
+        self.proj = torch.nn.Sequential(
+            torch.nn.Linear(clip_embeddings_dim, clip_embeddings_dim),
+            torch.nn.GELU(),
+            torch.nn.Linear(clip_embeddings_dim, cross_attention_dim),
+            torch.nn.LayerNorm(cross_attention_dim)
+        )
+
+    def forward(self, image_embeds):
+        clip_extra_context_tokens = self.proj(image_embeds)
+        return clip_extra_context_tokens
 
 class ImageProjModel(torch.nn.Module):
     """Projection Model"""
@@ -158,27 +172,34 @@ def forward(self, x):
 
 
 class IPAdapterModel(torch.nn.Module):
-    def __init__(self, state_dict, clip_embeddings_dim, cross_attention_dim, is_plus, sdxl_plus):
+    def __init__(self, state_dict, clip_embeddings_dim, cross_attention_dim, is_plus, sdxl_plus, is_full):
         super().__init__()
         self.device = "cpu"
 
         self.cross_attention_dim = cross_attention_dim
         self.is_plus = is_plus
         self.sdxl_plus = sdxl_plus
+        self.is_full = is_full
 
         if self.is_plus:
-            self.clip_extra_context_tokens = 16
-
-            self.image_proj_model = Resampler(
-                dim=1280 if sdxl_plus else cross_attention_dim,
-                depth=4,
-                dim_head=64,
-                heads=20 if sdxl_plus else 12,
-                num_queries=self.clip_extra_context_tokens,
-                embedding_dim=clip_embeddings_dim,
-                output_dim=self.cross_attention_dim,
-                ff_mult=4
-            )
+            if self.is_full:
+                self.image_proj_model = MLPProjModel(
+                    cross_attention_dim=cross_attention_dim,
+                    clip_embeddings_dim=clip_embeddings_dim
+                )
+            else:
+                self.clip_extra_context_tokens = 16
+
+                self.image_proj_model = Resampler(
+                    dim=1280 if sdxl_plus else cross_attention_dim,
+                    depth=4,
+                    dim_head=64,
+                    heads=20 if sdxl_plus else 12,
+                    num_queries=self.clip_extra_context_tokens,
+                    embedding_dim=clip_embeddings_dim,
+                    output_dim=self.cross_attention_dim,
+                    ff_mult=4
+                )
         else:
             self.clip_extra_context_tokens = state_dict["image_proj"]["proj.weight"].shape[0] // self.cross_attention_dim
 
@@ -294,14 +315,17 @@ def clear_all_ip_adapter():
 class PlugableIPAdapter(torch.nn.Module):
     def __init__(self, state_dict):
         super().__init__()
-        self.is_plus = "latents" in state_dict["image_proj"]
+        self.is_full = "proj.0.weight" in state_dict['image_proj']
+        self.is_plus = self.is_full or "latents" in state_dict["image_proj"]
         cross_attention_dim = state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[1]
         self.sdxl = cross_attention_dim == 2048
         self.sdxl_plus = self.sdxl and self.is_plus
 
         if self.is_plus:
             if self.sdxl_plus:
                 clip_embeddings_dim = int(state_dict["image_proj"]["latents"].shape[2])
+            elif self.is_full:
+                clip_embeddings_dim = int(state_dict["image_proj"]["proj.0.weight"].shape[1])
             else:
                 clip_embeddings_dim = int(state_dict['image_proj']['proj_in.weight'].shape[1])
         else:
@@ -311,7 +335,8 @@ def __init__(self, state_dict):
                                         clip_embeddings_dim=clip_embeddings_dim,
                                         cross_attention_dim=cross_attention_dim,
                                         is_plus=self.is_plus,
-                                        sdxl_plus=self.sdxl_plus)
+                                        sdxl_plus=self.sdxl_plus,
+                                        is_full=self.is_full)
         self.disable_memory_management = True
         self.dtype = None
         self.weight = 1.0

diff --git a/tests/web_api/txt2img_test.py b/tests/web_api/txt2img_test.py
@@ -8,9 +8,9 @@
 
 class TestAlwaysonTxt2ImgWorking(unittest.TestCase):
     def setUp(self):
-        sd_version = StableDiffusionVersion(int(
+        self.sd_version = StableDiffusionVersion(int(
             os.environ.get("CONTROLNET_TEST_SD_VERSION", StableDiffusionVersion.SD1x.value)))
-        self.model = utils.get_model("canny", sd_version)
+        self.model = utils.get_model("canny", self.sd_version)
 
         controlnet_unit = {
             "enabled": True,
@@ -167,7 +167,48 @@ def test_save_detected_map(self):
 
                 resp = requests.post(self.url_txt2img, json=self.simple_txt2img).json()
                 self.assertEqual(2 if save_map else 1, len(resp["images"]))
-
+
+    def test_ip_adapter_face(self):
+        match self.sd_version:
+            case StableDiffusionVersion.SDXL:
+                model = "ip-adapter-plus-face_sdxl_vit-h"
+                module = "ip-adapter_clip_sdxl_plus_vith"
+            case StableDiffusionVersion.SD1x:
+                model = "ip-adapter-plus-face_sd15"
+                module = "ip-adapter_clip_sd15"
+            case _:
+                # Skip the test for all other versions
+                return
+
+        self.simple_txt2img["alwayson_scripts"]["ControlNet"]["args"] = [
+            {
+                "input_image": utils.readImage("test/test_files/img2img_basic.png"),
+                "model": utils.get_model(model, self.sd_version),
+                "module": module,
+            }
+        ]
+
+        self.assert_status_ok()
+
+    def test_ip_adapter_fullface(self):
+        match self.sd_version:
+            case StableDiffusionVersion.SD1x:
+                model = "ip-adapter-full-face_sd15"
+                module = "ip-adapter_clip_sd15"
+            case _:
+                # Skip the test for all other versions
+                return
+
+        self.simple_txt2img["alwayson_scripts"]["ControlNet"]["args"] = [
+            {
+                "input_image": utils.readImage("test/test_files/img2img_basic.png"),
+                "model": utils.get_model(model, self.sd_version),
+                "module": module,
+            }
+        ]
+
+        self.assert_status_ok()
+
 
 if __name__ == "__main__":
     unittest.main()