-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
276 additions
and
43 deletions.
There are no files selected for viewing
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import numpy | ||
from PIL import Image | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import torch | ||
from transformers import AutoModel | ||
from numpy.linalg import norm | ||
import json | ||
|
||
|
||
def sort(d): | ||
# Sort the dictionary by value in descending order | ||
sorted_dict = dict(sorted(d.items(), key=lambda item: item[1], reverse=True)) | ||
return sorted_dict | ||
|
||
intent = input('What do you like to wear?\n') # "I like a dress with wide neckline" | ||
|
||
cos_sim = lambda a, b: (a @ b.T) / (norm(a) * norm(b)) | ||
model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-de', trust_remote_code=True, | ||
torch_dtype=torch.bfloat16) | ||
|
||
with open("fashionpedia-embeddings.json", 'r') as f: | ||
data = json.load(f) | ||
|
||
with open('../image-filename-mappings.json', 'r') as file: | ||
filename_of = json.load(file) | ||
|
||
image_cos = {} | ||
top_cos_sin_value = 0 | ||
top_image = "" | ||
for image in data: | ||
embeddings = model.encode(intent) | ||
embeddings2 = numpy.array(data.get(image)) | ||
v = cos_sim(embeddings, embeddings2) | ||
image_cos[image] = v | ||
if v > top_cos_sin_value: | ||
top_cos_sin_value = v | ||
top_image = image | ||
|
||
print(sort(image_cos)) | ||
# print(f"Top Image: {top_image} with Cosine Similarity: {top_cos_sin_value}") | ||
# top_image_filename = filename_of[top_image[:-2]] | ||
# | ||
# img = np.asarray(Image.open(f"images/{top_image_filename}")) | ||
# plt.imshow(img) | ||
# plt.show() | ||
|
||
seen_set = set() | ||
pos_set = set() | ||
neg_set = set() | ||
for image in sort(image_cos).keys(): | ||
filename = filename_of[image[:-2]] | ||
if filename not in seen_set: | ||
seen_set.add(filename) | ||
img = np.asarray(Image.open(f"images/{filename}")) | ||
plt.imshow(img) | ||
plt.show() | ||
feedback = input('Does this image contain something that fit your preferences? (y/n)\n') | ||
if feedback == "y": | ||
pos_set.add(image[:-2]) | ||
elif feedback == "n": | ||
neg_set.add(image[:-2]) | ||
else: | ||
print('Neutral selected') | ||
if len(seen_set) == 4: | ||
break | ||
|
||
with open("lp.json", 'w') as f: | ||
json.dump({"pos": pos_set, "neg": neg_set}, f) | ||
|
||
# print(f"Positive examples: {pos_set}") | ||
# print(f"Negative examples: {neg_set}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,30 @@ | ||
import csv | ||
from openai import OpenAI | ||
from owlapy.iri import IRI | ||
from owlapy.owl_ontology_manager import OntologyManager | ||
from owlapy.owl_property import OWLDataProperty | ||
from owlapy.owl_reasoner import OntologyReasoner, FastInstanceCheckerReasoner | ||
import torch | ||
from transformers import AutoModel | ||
from numpy.linalg import norm | ||
import json | ||
|
||
manager = OntologyManager() | ||
ontology = manager.load_ontology(IRI.create("file://../fashionpedia-second-generation.owl")) | ||
ontology = manager.load_ontology(IRI.create("file://fashionpedia-third-generation.owl")) | ||
base_reasoner = OntologyReasoner(ontology) | ||
reasoner = FastInstanceCheckerReasoner(base_reasoner=base_reasoner, ontology=ontology) | ||
has_description = OWLDataProperty(IRI.create("http://example.org/hasDescription")) | ||
dprop2 = OWLDataProperty(IRI.create("http://example.org/hasDescription")) | ||
dprop3 = OWLDataProperty(IRI.create("http://example.org/hasLLMDescription")) | ||
|
||
cos_sim = lambda a, b: (a @ b.T) / (norm(a) * norm(b)) | ||
model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-de', trust_remote_code=True, | ||
torch_dtype=torch.bfloat16) | ||
|
||
embeddings_final = {} | ||
for image in ontology.individuals_in_signature(): | ||
descriptions = list(reasoner.data_property_values(image, has_description)) | ||
desc_counter = 1 | ||
for description in descriptions: | ||
embeddings = model.encode(description.get_literal()) | ||
embeddings_final[image.str.split("/")[-1] + f"_{desc_counter}"] = embeddings.tolist() | ||
desc_counter += 1 | ||
|
||
with open("../fashionpedia-embeddingsss", 'w') as f: | ||
json.dump(embeddings_final, f) | ||
with open('output.csv', mode='a', newline='') as file: | ||
writer = csv.writer(file) | ||
client = OpenAI(base_url="http://tentris-ml.cs.upb.de:8502/v1", api_key="token-tentris-upb") | ||
count = 0 | ||
for image_ind in ontology.individuals_in_signature(): | ||
llm_description = str(list(reasoner.data_property_values(image_ind, dprop3))[0].get_literal()) | ||
if len(llm_description) > 4000: | ||
llm_description = llm_description[:4000] | ||
all_descriptions = "" | ||
for d in list(reasoner.data_property_values(image_ind, dprop2)): | ||
all_descriptions = all_descriptions + d.get_literal() + "\n" | ||
image_iri = image_ind.str | ||
responses = client.embeddings.create(input=[all_descriptions + "\n " + llm_description], model="tentris") | ||
writer.writerow([image_iri, responses.data[0].embedding]) | ||
count += 1 | ||
print(f"{image_iri}: {count:,}/45,623") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import numpy as np | ||
from sklearn.feature_extraction.text import TfidfVectorizer | ||
from scipy import sparse | ||
import rdflib | ||
|
||
|
||
class BM25(object): | ||
def __init__(self, b=0.75, k1=1.6): | ||
self.vectorizer = TfidfVectorizer(norm=None, smooth_idf=False) | ||
self.b = b | ||
self.k1 = k1 | ||
|
||
def fit(self, X): | ||
""" Fit IDF to documents X """ | ||
self.vectorizer.fit(X) | ||
y = super(TfidfVectorizer, self.vectorizer).transform(X) | ||
self.avdl = y.sum(1).mean() | ||
|
||
def transform(self, q, X): | ||
""" Calculate BM25 between query q and documents X """ | ||
b, k1, avdl = self.b, self.k1, self.avdl | ||
|
||
# apply CountVectorizer | ||
X = super(TfidfVectorizer, self.vectorizer).transform(X) | ||
len_X = X.sum(1).A1 | ||
q, = super(TfidfVectorizer, self.vectorizer).transform([q]) | ||
assert sparse.isspmatrix_csr(q) | ||
|
||
# convert to csc for better column slicing | ||
X = X.tocsc()[:, q.indices] | ||
denom = X + (k1 * (1 - b + b * len_X / avdl))[:, None] | ||
# idf(t) = log [ n / df(t) ] + 1 in sklearn, so it need to be coneverted | ||
# to idf(t) = log [ n / df(t) ] with minus 1 | ||
idf = self.vectorizer._tfidf.idf_[None, q.indices] - 1. | ||
numer = X.multiply(np.broadcast_to(idf, X.shape)) * (k1 + 1) | ||
return (numer / denom).sum(1).A1 | ||
|
||
|
||
# ------------ End of library impl. Followings are the example ----------------- | ||
from sklearn.datasets import fetch_20newsgroups | ||
|
||
# documents = fetch_20newsgroups(subset='train').data | ||
g = rdflib.Graph() | ||
g.parse("fashionpedia-third-generation.owl", format="xml") | ||
|
||
# Extract triplets | ||
documents = [] | ||
for subject in g.subjects(): | ||
txt = f"{str(subject)} \n" | ||
# exclude uneccesary information | ||
for predicate, obj in g.predicate_objects(subject): | ||
txt += f"{obj} \n" | ||
documents.append(txt) | ||
|
||
bm25 = BM25() | ||
bm25.fit(documents) | ||
# Find the similar documents given query | ||
query = "What elegant and traditional bridal gowns are available?" | ||
target_iri = "http://example.org/image_47178" | ||
scores = bm25.transform(query, documents) | ||
storage = dict() | ||
for i, val in enumerate(documents): | ||
iri = val.split()[0] | ||
storage[iri] = scores[i] | ||
|
||
storage_sorted = {k: v for k, v in sorted(storage.items(), key=lambda item: item[1], reverse=True)} | ||
|
||
placement = list(storage_sorted.keys()).index(target_iri) | ||
|
||
print(f"target_iri placement: {placement} \n target_iri score: {storage_sorted[target_iri]}") | ||
print(f" Top scorer: {storage_sorted.items()}") | ||
|
||
# print(documents[np.argmax(scores)]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from openai import OpenAI | ||
import pandas as pd | ||
import numpy as np | ||
from numpy.linalg import norm | ||
|
||
query = 'I like a dress with wide neckline' | ||
|
||
df = pd.read_csv("embeddings_short2.csv", index_col=0, nrows=None) | ||
iris = df.index.values.tolist() | ||
|
||
client = OpenAI(base_url="http://tentris-ml.cs.upb.de:8502/v1", api_key="token-tentris-upb") | ||
|
||
docs = np.array(df.values) | ||
qr = np.array(client.embeddings.create(input=[query], model="tentris").data[0].embedding) | ||
|
||
docs_norms = docs / norm(docs, axis=1, keepdims=True) | ||
qr_norms = qr / norm(qr) | ||
|
||
cosine_similarities = (docs_norms @ qr_norms).flatten() | ||
|
||
best_match_index = np.argmax(cosine_similarities) | ||
best_similarity = cosine_similarities[best_match_index] | ||
|
||
print(cosine_similarities) | ||
print(f"The best scoring image is the image with iri: {iris[best_match_index]} and score: {best_similarity}") |
Large diffs are not rendered by default.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
82 changes: 82 additions & 0 deletions
82
local_working_directory/recommendation_script2_first_dataset.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import json | ||
from PIL import Image | ||
|
||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
from owlapy.class_expression import OWLObjectHasValue | ||
from owlapy.iri import IRI | ||
from owlapy.owl_individual import OWLNamedIndividual | ||
from ontolearn.knowledge_base import KnowledgeBase | ||
from ontolearn.concept_learner import EvoLearner, CELOE | ||
from ontolearn.learning_problem import PosNegLPStandard | ||
from owlapy.owl_property import OWLObjectProperty, OWLDataProperty | ||
|
||
kb = KnowledgeBase(path="../fashionpedia-first-generation.owl") | ||
|
||
|
||
def add_namespace(ind): | ||
return "http://example.org/" + ind | ||
|
||
|
||
pos = {'image_21007': "Contains dress from supercategory 'wholebody' with the following attributes: a-line of supercategory 'silhouette', mini (length) of supercategory 'length'", 'image_45433': "Contains dress from supercategory 'wholebody' with the following attributes: normal waist of supercategory 'waistline', gown of supercategory 'nickname', trumpet of supercategory 'silhouette', asymmetrical of supercategory 'silhouette', lining of supercategory 'textile finishing, manufacturing techniques', maxi (length) of supercategory 'length', plastic of supercategory 'non-textile material type', zip-up of supercategory 'opening type'", 'image_19012': "Contains neckline from supercategory 'garment parts' with the following attributes: straight across (neck) of supercategory 'neckline type'", 'image_42863': "Contains neckline from supercategory 'garment parts' with the following attributes: straight across (neck) of supercategory 'neckline type'", 'image_8113': "Contains neckline from supercategory 'garment parts' with the following attributes: straight across (neck) of supercategory 'neckline type'", 'image_26892': "Contains neckline from supercategory 'garment parts' with the following attributes: straight across (neck) of supercategory 'neckline type'", 'image_32017': "Contains neckline from supercategory 'garment parts' with the following attributes: straight across (neck) of supercategory 'neckline type'", 'image_42632': "Contains neckline from supercategory 'garment parts' with the following attributes: straight across (neck) of supercategory 'neckline type'"} | ||
|
||
neg = {'image_34250': "Contains neckline from supercategory 'garment parts' with the following attributes: straight across (neck) of supercategory 'neckline type'", 'image_20344': "Contains neckline from supercategory 'garment parts' with the following attributes: straight across (neck) of supercategory 'neckline type'"} | ||
|
||
with open('../image-filename-mappings.json', 'r') as file: | ||
filename_of = json.load(file) | ||
oprop = OWLObjectProperty(IRI.create("http://example.org/hasImage")) | ||
oprop2 = OWLObjectProperty(IRI.create("http://example.org/hasCategory")) | ||
oprop3 = OWLObjectProperty(IRI.create("http://example.org/hasAttribute")) | ||
dprop4 = OWLDataProperty(IRI.create("http://example.org/hasName")) | ||
dprop5 = OWLDataProperty(IRI.create("http://example.org/hasSupercategory")) | ||
|
||
|
||
typed_pos = set() | ||
typed_neg = set() | ||
|
||
for i in set(pos.keys()).union(set(neg.keys())): | ||
ind = OWLNamedIndividual(add_namespace(i)) | ||
annotations = kb.individuals(OWLObjectHasValue(oprop, ind)) | ||
final_description = "" | ||
for annotation in annotations: | ||
cat_ind = list(kb.get_object_property_values(annotation, oprop2))[0] | ||
cat_name = list(kb.get_data_property_values(cat_ind, dprop4))[0].get_literal() | ||
cat_supercat = list(kb.get_data_property_values(cat_ind, dprop5))[0].get_literal() | ||
description = f"Contains {cat_name} from supercategory '{cat_supercat}'" | ||
attrs = list(kb.get_object_property_values(annotation, oprop3)) | ||
if len(attrs) > 0: | ||
description += " with the following attributes: " | ||
for attr in attrs: | ||
attr_name = list(kb.get_data_property_values(attr, dprop4))[0].get_literal() | ||
attr_supercat = list(kb.get_data_property_values(attr, dprop5))[0].get_literal() | ||
description += f"{attr_name} of supercategory '{attr_supercat}', " | ||
final_description = description[:-2] | ||
else: | ||
final_description = description | ||
|
||
if i in pos.keys() and final_description in pos[i]: | ||
typed_pos.add(annotation) | ||
elif i in neg.keys() and final_description in neg[i]: | ||
typed_neg.add(annotation) | ||
|
||
print(typed_pos) | ||
print(typed_neg) | ||
lp = PosNegLPStandard(pos=typed_pos, neg=typed_neg) | ||
|
||
model = CELOE(knowledge_base=kb, max_runtime=120) | ||
model.fit(lp, verbose=False) | ||
|
||
hypotheses = model.best_hypotheses(n=1) | ||
print(hypotheses) | ||
for ind in list(kb.individuals(hypotheses))[:10]: | ||
print(ind) | ||
filename = filename_of[ind.iri.reminder] | ||
img = np.asarray(Image.open(f'../images/{filename}')) | ||
plt.imshow(img) | ||
plt.show() | ||
try: | ||
print(":gdf") | ||
except Exception as e: | ||
print(e) | ||
with open("questions.json", "w") as outfile: | ||
json.dump({"dsad": "dasd"}, outfile) |