From d9a13230799e3a54d57b7402b034ea56fcc3acf5 Mon Sep 17 00:00:00 2001 From: Geir Arne Hjelle Date: Tue, 7 Nov 2023 16:45:30 +0100 Subject: [PATCH] Final QA (#457) --- embeddings-and-vector-databases-with-chromadb/README.md | 2 +- embeddings-and-vector-databases-with-chromadb/car_data_etl.py | 4 +--- embeddings-and-vector-databases-with-chromadb/chroma_utils.py | 4 ++-- .../cosine_similarity.py | 2 +- .../intro_to_vectors.py | 2 +- 5 files changed, 6 insertions(+), 8 deletions(-) diff --git a/embeddings-and-vector-databases-with-chromadb/README.md b/embeddings-and-vector-databases-with-chromadb/README.md index 429fc348dc..ee0d1636d0 100644 --- a/embeddings-and-vector-databases-with-chromadb/README.md +++ b/embeddings-and-vector-databases-with-chromadb/README.md @@ -1,6 +1,6 @@ # Embeddings and Vector Databases With ChromaDB -Supporting code for the Real Python tutorial [Embeddings and Vector Databases With ChromaDB](https://realpython.com/embeddings-and-vector-databases-with-chromadb/). +Supporting code for the Real Python tutorial [Embeddings and Vector Databases With ChromaDB](https://realpython.com/chromadb-vector-database/). To run the code in this tutorial, you should have `numpy`, `spacy`, `sentence-transformers`, `chromadb`, `polars`, `more-itertools`, and `openai` installed in your environment. diff --git a/embeddings-and-vector-databases-with-chromadb/car_data_etl.py b/embeddings-and-vector-databases-with-chromadb/car_data_etl.py index 1026bc9f2d..3f8fdb171b 100644 --- a/embeddings-and-vector-databases-with-chromadb/car_data_etl.py +++ b/embeddings-and-vector-databases-with-chromadb/car_data_etl.py @@ -57,6 +57,4 @@ def prepare_car_reviews_data( documents = car_review_db_data["Review"].to_list() metadatas = car_review_db_data.drop("Review").to_dicts() - chroma_data = {"ids": ids, "documents": documents, "metadatas": metadatas} - - return chroma_data + return {"ids": ids, "documents": documents, "metadatas": metadatas} diff --git a/embeddings-and-vector-databases-with-chromadb/chroma_utils.py b/embeddings-and-vector-databases-with-chromadb/chroma_utils.py index b45f12c934..253c191a1f 100644 --- a/embeddings-and-vector-databases-with-chromadb/chroma_utils.py +++ b/embeddings-and-vector-databases-with-chromadb/chroma_utils.py @@ -8,7 +8,7 @@ def build_chroma_collection( chroma_path: pathlib.Path, collection_name: str, - embbeding_func_name: str, + embedding_func_name: str, ids: list[str], documents: list[str], metadatas: list[dict], @@ -19,7 +19,7 @@ def build_chroma_collection( chroma_client = chromadb.PersistentClient(chroma_path) embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction( - model_name=embbeding_func_name + model_name=embedding_func_name ) collection = chroma_client.create_collection( diff --git a/embeddings-and-vector-databases-with-chromadb/cosine_similarity.py b/embeddings-and-vector-databases-with-chromadb/cosine_similarity.py index cb3a416edd..bc642bf95b 100644 --- a/embeddings-and-vector-databases-with-chromadb/cosine_similarity.py +++ b/embeddings-and-vector-databases-with-chromadb/cosine_similarity.py @@ -4,4 +4,4 @@ def compute_cosine_similarity(u: np.ndarray, v: np.ndarray) -> float: """Compute the cosine similarity between two vectors""" - return u.dot(v) / (np.linalg.norm(u) * np.linalg.norm(v)) + return (u @ v) / (np.linalg.norm(u) * np.linalg.norm(v)) diff --git a/embeddings-and-vector-databases-with-chromadb/intro_to_vectors.py b/embeddings-and-vector-databases-with-chromadb/intro_to_vectors.py index 2659a23aeb..9d1aad61f3 100644 --- a/embeddings-and-vector-databases-with-chromadb/intro_to_vectors.py +++ b/embeddings-and-vector-databases-with-chromadb/intro_to_vectors.py @@ -20,4 +20,4 @@ # Dot product print(np.sum(v1 * v2)) -print(v1.dot(v3)) +print(v1 @ v3)