forked from dmarx/Multi-Modal-Comparators
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo.py
62 lines (47 loc) · 1.73 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import torch
from mmc.multimmc import MultiMMC
from mmc.modalities import TEXT, IMAGE
# for now at least, I'm referring to models like CLIP, CLOOB, SLIP etc. as
# "multi-modal comparators" (MMCs). The MultiMMC class is a generic wrapper
# that serves the same function as like a "MultiCLIPPerceptor", but is
# intended to be suficiently generic to be able to wrap collections of models
# that aren't all from the same family. The conly constraint is that
# the individual MMCs attached to the MultiMMC must each be compatible with
# the modalities the MultiMMC supports.
perceptor = MultiMMC(TEXT, IMAGE)#, shared_latent=True)
oa_clip_modelnames = [
'RN50',
'RN101',
'ViTL64',
...
]
#perceptor.load_model(architecture='slip', id='some-clip-model')
#perceptor.load_model(architecture='blip', id='that-one-blip-model')
# Individual MMCs can be ascribed weights. Potentially ways this could be used:
# * weighted ensemble of perceptors
# * compensate for perceptors that produce outputs at different scales
for model_name in oa_clip_modelnames:
perceptor.load_model(
architecture='clip',
publisher='openai',
id=model_name,
#weight=1, # default
)
# add a model that takes 50% responsibility for score, cause why not
perceptor.load_model(
architecture='cloob',
publisher='crowsonkb',
weight=len(perceptor.models),
)
logger.debug(perceptor.models.keys())
assert perceptor.supports_text
assert perceptor.supports_image
#assert perceptor.has_shared_latent
[m.name for m in perceptor.modalities]
text=["foo bar baz"]
image=IMAGE.read_from_disk('foobar.jpg')
multi_similarity_score = perceptor.compare(
text=text_container,
image=image_container,
return_projections = False,
)