Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for IP Adapter Full Face #2252

Merged
merged 3 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,26 @@ jobs:
uses: actions/cache@v3
with:
path: stable-diffusion-webui/extensions/sd-webui-controlnet/models/
key: controlnet-models-v1
key: controlnet-models-v2
- name: Cache Preprocessor models
uses: actions/cache@v3
with:
path: stable-diffusion-webui/extensions/sd-webui-controlnet/annotator/downloads/
key: preprocessor-models-v1
- name: Download controlnet model for testing
run: |
if [ ! -f "extensions/sd-webui-controlnet/models/control_v11p_sd15_canny.pth" ]; then
curl -Lo extensions/sd-webui-controlnet/models/control_v11p_sd15_canny.pth https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_canny.pth
fi
declare -a urls=(
"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_canny.pth"
"https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter-full-face_sd15.safetensors"
"https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter-plus-face_sd15.safetensors"
)

for url in "${urls[@]}"; do
filename="extensions/sd-webui-controlnet/models/${url##*/}" # Extracts the last part of the URL
if [ ! -f "$filename" ]; then
curl -Lo "$filename" "$url"
fi
done
working-directory: stable-diffusion-webui
- name: Start test server
run: >
Expand Down Expand Up @@ -94,10 +103,10 @@ jobs:
if: always()
with:
name: output
path: output.txt
path: stable-diffusion-webui/output.txt
- name: Upload coverage HTML
uses: actions/upload-artifact@v3
if: always()
with:
name: htmlcov
path: htmlcov
path: stable-diffusion-webui/htmlcov
55 changes: 40 additions & 15 deletions scripts/controlmodel_ipadapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@
SD_V12_CHANNELS = [320] * 4 + [640] * 4 + [1280] * 4 + [1280] * 6 + [640] * 6 + [320] * 6 + [1280] * 2
SD_XL_CHANNELS = [640] * 8 + [1280] * 40 + [1280] * 60 + [640] * 12 + [1280] * 20

class MLPProjModel(torch.nn.Module):
def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024):
super().__init__()

self.proj = torch.nn.Sequential(
torch.nn.Linear(clip_embeddings_dim, clip_embeddings_dim),
torch.nn.GELU(),
torch.nn.Linear(clip_embeddings_dim, cross_attention_dim),
torch.nn.LayerNorm(cross_attention_dim)
)

def forward(self, image_embeds):
clip_extra_context_tokens = self.proj(image_embeds)
return clip_extra_context_tokens

class ImageProjModel(torch.nn.Module):
"""Projection Model"""
Expand Down Expand Up @@ -158,27 +172,34 @@ def forward(self, x):


class IPAdapterModel(torch.nn.Module):
def __init__(self, state_dict, clip_embeddings_dim, cross_attention_dim, is_plus, sdxl_plus):
def __init__(self, state_dict, clip_embeddings_dim, cross_attention_dim, is_plus, sdxl_plus, is_full):
super().__init__()
self.device = "cpu"

self.cross_attention_dim = cross_attention_dim
self.is_plus = is_plus
self.sdxl_plus = sdxl_plus
self.is_full = is_full

if self.is_plus:
self.clip_extra_context_tokens = 16

self.image_proj_model = Resampler(
dim=1280 if sdxl_plus else cross_attention_dim,
depth=4,
dim_head=64,
heads=20 if sdxl_plus else 12,
num_queries=self.clip_extra_context_tokens,
embedding_dim=clip_embeddings_dim,
output_dim=self.cross_attention_dim,
ff_mult=4
)
if self.is_full:
self.image_proj_model = MLPProjModel(
cross_attention_dim=cross_attention_dim,
clip_embeddings_dim=clip_embeddings_dim
)
else:
self.clip_extra_context_tokens = 16

self.image_proj_model = Resampler(
dim=1280 if sdxl_plus else cross_attention_dim,
depth=4,
dim_head=64,
heads=20 if sdxl_plus else 12,
num_queries=self.clip_extra_context_tokens,
embedding_dim=clip_embeddings_dim,
output_dim=self.cross_attention_dim,
ff_mult=4
)
else:
self.clip_extra_context_tokens = state_dict["image_proj"]["proj.weight"].shape[0] // self.cross_attention_dim

Expand Down Expand Up @@ -294,14 +315,17 @@ def clear_all_ip_adapter():
class PlugableIPAdapter(torch.nn.Module):
def __init__(self, state_dict):
super().__init__()
self.is_plus = "latents" in state_dict["image_proj"]
self.is_full = "proj.0.weight" in state_dict['image_proj']
self.is_plus = self.is_full or "latents" in state_dict["image_proj"]
cross_attention_dim = state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[1]
self.sdxl = cross_attention_dim == 2048
self.sdxl_plus = self.sdxl and self.is_plus

if self.is_plus:
if self.sdxl_plus:
clip_embeddings_dim = int(state_dict["image_proj"]["latents"].shape[2])
elif self.is_full:
clip_embeddings_dim = int(state_dict["image_proj"]["proj.0.weight"].shape[1])
else:
clip_embeddings_dim = int(state_dict['image_proj']['proj_in.weight'].shape[1])
else:
Expand All @@ -311,7 +335,8 @@ def __init__(self, state_dict):
clip_embeddings_dim=clip_embeddings_dim,
cross_attention_dim=cross_attention_dim,
is_plus=self.is_plus,
sdxl_plus=self.sdxl_plus)
sdxl_plus=self.sdxl_plus,
is_full=self.is_full)
self.disable_memory_management = True
self.dtype = None
self.weight = 1.0
Expand Down
47 changes: 44 additions & 3 deletions tests/web_api/txt2img_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

class TestAlwaysonTxt2ImgWorking(unittest.TestCase):
def setUp(self):
sd_version = StableDiffusionVersion(int(
self.sd_version = StableDiffusionVersion(int(
os.environ.get("CONTROLNET_TEST_SD_VERSION", StableDiffusionVersion.SD1x.value)))
self.model = utils.get_model("canny", sd_version)
self.model = utils.get_model("canny", self.sd_version)

controlnet_unit = {
"enabled": True,
Expand Down Expand Up @@ -167,7 +167,48 @@ def test_save_detected_map(self):

resp = requests.post(self.url_txt2img, json=self.simple_txt2img).json()
self.assertEqual(2 if save_map else 1, len(resp["images"]))


def test_ip_adapter_face(self):
match self.sd_version:
case StableDiffusionVersion.SDXL:
model = "ip-adapter-plus-face_sdxl_vit-h"
module = "ip-adapter_clip_sdxl_plus_vith"
case StableDiffusionVersion.SD1x:
model = "ip-adapter-plus-face_sd15"
module = "ip-adapter_clip_sd15"
case _:
# Skip the test for all other versions
return

self.simple_txt2img["alwayson_scripts"]["ControlNet"]["args"] = [
{
"input_image": utils.readImage("test/test_files/img2img_basic.png"),
"model": utils.get_model(model, self.sd_version),
"module": module,
}
]

self.assert_status_ok()

def test_ip_adapter_fullface(self):
match self.sd_version:
case StableDiffusionVersion.SD1x:
model = "ip-adapter-full-face_sd15"
module = "ip-adapter_clip_sd15"
case _:
# Skip the test for all other versions
return

self.simple_txt2img["alwayson_scripts"]["ControlNet"]["args"] = [
{
"input_image": utils.readImage("test/test_files/img2img_basic.png"),
"model": utils.get_model(model, self.sd_version),
"module": module,
}
]

self.assert_status_ok()


if __name__ == "__main__":
unittest.main()