Skip to content

Commit

Permalink
restructuring of files
Browse files Browse the repository at this point in the history
  • Loading branch information
alkidbaci committed Nov 14, 2024
1 parent f25d35f commit 13b90f5
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions archives/embeddings-generation_old.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from owlapy.iri import IRI
from owlapy.owl_ontology_manager import OntologyManager
from owlapy.owl_property import OWLDataProperty
from owlapy.owl_reasoner import OntologyReasoner, FastInstanceCheckerReasoner
import torch
from transformers import AutoModel
from numpy.linalg import norm
import json

manager = OntologyManager()
ontology = manager.load_ontology(IRI.create("file://../fashionpedia-second-generation.owl"))
base_reasoner = OntologyReasoner(ontology)
reasoner = FastInstanceCheckerReasoner(base_reasoner=base_reasoner, ontology=ontology)
has_description = OWLDataProperty(IRI.create("http://example.org/hasDescription"))

cos_sim = lambda a, b: (a @ b.T) / (norm(a) * norm(b))
model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-de', trust_remote_code=True,
torch_dtype=torch.bfloat16)

embeddings_final = {}
for image in ontology.individuals_in_signature():
descriptions = list(reasoner.data_property_values(image, has_description))
desc_counter = 1
for description in descriptions:
embeddings = model.encode(description.get_literal())
embeddings_final[image.str.split("/")[-1] + f"_{desc_counter}"] = embeddings.tolist()
desc_counter += 1

with open("../fashionpedia-embeddingsss", 'w') as f:
json.dump(embeddings_final, f)

0 comments on commit 13b90f5

Please sign in to comment.