From ed8d2b0c69ec17700a307376d2c52df292cabbea Mon Sep 17 00:00:00 2001
From: hfhoffman1144 <hfh160130@utdallas.edu>
Date: Thu, 19 Oct 2023 08:21:46 -0500
Subject: [PATCH 1/5] first materials commit for chromadb article

---
 embeddings_chromadb/README.md                 |   7 ++
 embeddings_chromadb/car_data_etl.py           |  61 ++++++++++
 embeddings_chromadb/chroma_utils.py           |  40 +++++++
 embeddings_chromadb/config.json               |   3 +
 embeddings_chromadb/cosine_similarity.py      |   7 ++
 .../create_car_review_collection.py           |  38 ++++++
 embeddings_chromadb/intro_to_vectors.py       |  23 ++++
 embeddings_chromadb/llm_car_review_context.py | 113 ++++++++++++++++++
 embeddings_chromadb/text_embeddings.py        |  40 +++++++
 embeddings_chromadb/word_vectors.py           |  34 ++++++
 10 files changed, 366 insertions(+)
 create mode 100644 embeddings_chromadb/README.md
 create mode 100644 embeddings_chromadb/car_data_etl.py
 create mode 100644 embeddings_chromadb/chroma_utils.py
 create mode 100644 embeddings_chromadb/config.json
 create mode 100644 embeddings_chromadb/cosine_similarity.py
 create mode 100644 embeddings_chromadb/create_car_review_collection.py
 create mode 100644 embeddings_chromadb/intro_to_vectors.py
 create mode 100644 embeddings_chromadb/llm_car_review_context.py
 create mode 100644 embeddings_chromadb/text_embeddings.py
 create mode 100644 embeddings_chromadb/word_vectors.py

diff --git a/embeddings_chromadb/README.md b/embeddings_chromadb/README.md
new file mode 100644
index 0000000000..6f116901b0
--- /dev/null
+++ b/embeddings_chromadb/README.md
@@ -0,0 +1,7 @@
+# Embeddings and Vector Databases With ChromaDB
+
+Supporting code for the Real Python tutorial [Embeddings and Vector Databases With ChromaDB](https://realpython.com/embeddings-and-vector-databases-with-chromadb/). 
+
+To run the code in this tutorial, you should have `numpy`, `spacy`, `sentence-transformers`, `chromadb`, `polars`, `more-itertools`,  and `openai` installed in your environment. 
+
+
diff --git a/embeddings_chromadb/car_data_etl.py b/embeddings_chromadb/car_data_etl.py
new file mode 100644
index 0000000000..b454a4e19b
--- /dev/null
+++ b/embeddings_chromadb/car_data_etl.py
@@ -0,0 +1,61 @@
+import pathlib
+import polars as pl
+
+
+def prepare_car_reviews_data(
+    data_path: pathlib.Path, vehicle_years: list[int] = [2017]
+):
+    """Prepare the car reviews dataset for ChromaDB"""
+
+    # Define the schema to ensure proper data types are enforced
+    dtypes = {
+        "": pl.Int64,
+        "Review_Date": pl.Utf8,
+        "Author_Name": pl.Utf8,
+        "Vehicle_Title": pl.Utf8,
+        "Review_Title": pl.Utf8,
+        "Review": pl.Utf8,
+        "Rating": pl.Float64,
+    }
+
+    # Scan the car reviews dataset(s)
+    car_reviews = pl.scan_csv(data_path, dtypes=dtypes)
+
+    # Extract the vehicle title and year as new columns
+    # Filter on selected years
+    car_review_db_data = (
+        car_reviews.with_columns(
+            [
+                (
+                    pl.col("Vehicle_Title")
+                    .str.split(by=" ")
+                    .list.get(0)
+                    .cast(pl.Int64)
+                ).alias("Vehicle_Year"),
+                (pl.col("Vehicle_Title").str.split(by=" ").list.get(1)).alias(
+                    "Vehicle_Model"
+                ),
+            ]
+        )
+        .filter(pl.col("Vehicle_Year").is_in(vehicle_years))
+        .select(
+            [
+                "Review_Title",
+                "Review",
+                "Rating",
+                "Vehicle_Year",
+                "Vehicle_Model",
+            ]
+        )
+        .sort(["Vehicle_Model", "Rating"])
+        .collect()
+    )
+
+    # Create ids, documents, and metadatas data in the format chromadb expects
+    ids = [f"review{i}" for i in range(car_review_db_data.shape[0])]
+    documents = car_review_db_data["Review"].to_list()
+    metadatas = car_review_db_data.drop("Review").to_dicts()
+
+    chroma_data = {"ids": ids, "documents": documents, "metadatas": metadatas}
+
+    return chroma_data
diff --git a/embeddings_chromadb/chroma_utils.py b/embeddings_chromadb/chroma_utils.py
new file mode 100644
index 0000000000..e1afd386c6
--- /dev/null
+++ b/embeddings_chromadb/chroma_utils.py
@@ -0,0 +1,40 @@
+import pathlib
+import chromadb
+from chromadb.utils import embedding_functions
+from more_itertools import batched
+
+
+def build_chroma_collection(
+    chroma_path: pathlib.Path,
+    collection_name: str,
+    embbeding_func_name: str,
+    ids: list[str],
+    documents: list[str],
+    metadatas: list[dict],
+    distance_func_name: str = "cosine",
+):
+    """Create a ChromaDB collection"""
+
+    chroma_client = chromadb.PersistentClient(chroma_path)
+
+    embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
+        model_name=embbeding_func_name
+    )
+
+    collection = chroma_client.create_collection(
+        name=collection_name,
+        embedding_function=embedding_func,
+        metadata={"hnsw:space": distance_func_name},
+    )
+
+    document_indicies = list(range(len(documents)))
+
+    for batch in batched(document_indicies, 166):
+        start_idx = batch[0]
+        end_idx = batch[-1]
+
+        collection.add(
+            ids=ids[start_idx:end_idx],
+            documents=documents[start_idx:end_idx],
+            metadatas=metadatas[start_idx:end_idx],
+        )
diff --git a/embeddings_chromadb/config.json b/embeddings_chromadb/config.json
new file mode 100644
index 0000000000..a395e21fcf
--- /dev/null
+++ b/embeddings_chromadb/config.json
@@ -0,0 +1,3 @@
+{
+    "openai-secret-key": "your-api-key"
+}
\ No newline at end of file
diff --git a/embeddings_chromadb/cosine_similarity.py b/embeddings_chromadb/cosine_similarity.py
new file mode 100644
index 0000000000..cb3a416edd
--- /dev/null
+++ b/embeddings_chromadb/cosine_similarity.py
@@ -0,0 +1,7 @@
+import numpy as np
+
+
+def compute_cosine_similarity(u: np.ndarray, v: np.ndarray) -> float:
+    """Compute the cosine similarity between two vectors"""
+
+    return u.dot(v) / (np.linalg.norm(u) * np.linalg.norm(v))
diff --git a/embeddings_chromadb/create_car_review_collection.py b/embeddings_chromadb/create_car_review_collection.py
new file mode 100644
index 0000000000..1e7c4643fb
--- /dev/null
+++ b/embeddings_chromadb/create_car_review_collection.py
@@ -0,0 +1,38 @@
+import chromadb
+from chromadb.utils import embedding_functions
+from car_data_etl import prepare_car_reviews_data
+from chroma_utils import build_chroma_collection
+
+DATA_PATH = "data/archive/*"
+CHROMA_PATH = "car_review_embeddings"
+EMBEDDING_FUNC_NAME = "multi-qa-MiniLM-L6-cos-v1"
+COLLECTION_NAME = "car_reviews"
+
+chroma_car_reviews_dict = prepare_car_reviews_data(DATA_PATH)
+
+build_chroma_collection(
+    CHROMA_PATH,
+    COLLECTION_NAME,
+    EMBEDDING_FUNC_NAME,
+    chroma_car_reviews_dict["ids"],
+    chroma_car_reviews_dict["documents"],
+    chroma_car_reviews_dict["metadatas"],
+)
+
+client = chromadb.PersistentClient(CHROMA_PATH)
+embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
+    model_name=EMBEDDING_FUNC_NAME
+)
+collection = client.get_collection(
+    name=COLLECTION_NAME, embedding_function=embedding_func
+)
+
+great_reviews = collection.query(
+    query_texts=[
+        "Find me some positive reviews that discuss the car's performance"
+    ],
+    n_results=5,
+    include=["documents", "distances", "metadatas"],
+)
+
+print(great_reviews["documents"][0][0])
diff --git a/embeddings_chromadb/intro_to_vectors.py b/embeddings_chromadb/intro_to_vectors.py
new file mode 100644
index 0000000000..2659a23aeb
--- /dev/null
+++ b/embeddings_chromadb/intro_to_vectors.py
@@ -0,0 +1,23 @@
+import numpy as np
+
+# Create vectors with NumPy
+vector1 = np.array([1, 0])
+vector2 = np.array([0, 1])
+print(vector1)
+print(vector2)
+
+v1 = np.array([1, 0])
+v2 = np.array([0, 1])
+v3 = np.array([np.sqrt(2), np.sqrt(2)])
+
+# Dimension
+print(v1.shape)
+
+# Magnitude
+print(np.sqrt(np.sum(v1**2)))
+print(np.linalg.norm(v1))
+print(np.linalg.norm(v3))
+
+# Dot product
+print(np.sum(v1 * v2))
+print(v1.dot(v3))
diff --git a/embeddings_chromadb/llm_car_review_context.py b/embeddings_chromadb/llm_car_review_context.py
new file mode 100644
index 0000000000..f648a9940e
--- /dev/null
+++ b/embeddings_chromadb/llm_car_review_context.py
@@ -0,0 +1,113 @@
+import os
+import json
+import openai
+import chromadb
+from chromadb.utils import embedding_functions
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+DATA_PATH = "data/archive/*"
+CHROMA_PATH = "car_review_embeddings"
+EMBEDDING_FUNC_NAME = "multi-qa-MiniLM-L6-cos-v1"
+COLLECTION_NAME = "car_reviews"
+
+with open("config.json", "r") as json_file:
+    config_data = json.load(json_file)
+
+openai.api_key = config_data.get("openai-secret-key")
+
+client = chromadb.PersistentClient(CHROMA_PATH)
+embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
+    model_name=EMBEDDING_FUNC_NAME
+)
+
+collection = client.get_collection(
+    name=COLLECTION_NAME, embedding_function=embedding_func
+)
+
+context = """
+ You are a customer success employee at a large
+  car dealership. Use the following car reviews
+  to answer questions: {}
+ """
+
+question = """
+ What's the key to great customer satisfaction
+  based on detailed positive reviews?
+ """
+
+good_reviews = collection.query(
+    query_texts=[question],
+    n_results=10,
+    include=["documents"],
+    where={"Rating": {"$gte": 3}},
+)
+
+reviews_str = ",".join(good_reviews["documents"][0])
+
+good_review_summaries = openai.ChatCompletion.create(
+    model="gpt-3.5-turbo",
+    messages=[
+        {"role": "system", "content": context.format(reviews_str)},
+        {"role": "user", "content": question},
+    ],
+    temperature=0,
+    n=1,
+)
+
+reviews_str = ",".join(good_reviews["documents"][0])
+
+print("Good reviews: ")
+print(reviews_str)
+print("###########################################")
+
+good_review_summaries = openai.ChatCompletion.create(
+    model="gpt-3.5-turbo",
+    messages=[
+        {"role": "system", "content": context.format(reviews_str)},
+        {"role": "user", "content": question},
+    ],
+    temperature=0,
+    n=1,
+)
+
+print("AI-Generated summary of good reviews: ")
+print(good_review_summaries["choices"][0]["message"]["content"])
+print("###########################################")
+
+
+context = """
+          You are a customer success employee at a large car dealership.
+          Use the following car reivews to answer questions: {}
+          """
+question = """
+            Which of these poor reviews has the worst implications about
+            our dealership? Explain why.
+            """
+
+poor_reviews = collection.query(
+    query_texts=[question],
+    n_results=5,
+    include=["documents"],
+    where={"Rating": {"$lte": 3}},
+)
+
+reviews_str = ",".join(poor_reviews["documents"][0])
+
+print("Worst reviews: ")
+print(poor_reviews["documents"][0][0])
+print("###########################################")
+
+poor_review_analysis = openai.ChatCompletion.create(
+    model="gpt-3.5-turbo",
+    messages=[
+        {"role": "system", "content": context.format(reviews_str)},
+        {"role": "user", "content": question},
+    ],
+    temperature=0,
+    n=1,
+)
+
+print("AI-Generated summary of the single worst review: ")
+print(poor_review_analysis["choices"][0]["message"]["content"])
+print("###########################################")
diff --git a/embeddings_chromadb/text_embeddings.py b/embeddings_chromadb/text_embeddings.py
new file mode 100644
index 0000000000..073416f357
--- /dev/null
+++ b/embeddings_chromadb/text_embeddings.py
@@ -0,0 +1,40 @@
+from sentence_transformers import SentenceTransformer
+from cosine_similarity import compute_cosine_similarity
+
+model = SentenceTransformer("all-MiniLM-L6-v2")
+texts = [
+    "The canine barked loudly.",
+    "The dog made a noisy bark.",
+    "He ate a lot of pizza.",
+    "He devoured a large quantity of pizza pie.",
+]
+
+text_embeddings = model.encode(texts)
+
+print(type(text_embeddings))
+
+print(text_embeddings.shape)
+
+text_embeddings_dict = dict(zip(texts, list(text_embeddings)))
+
+dog_text_1 = "The canine barked loudly."
+dog_text_2 = "The dog made a noisy bark."
+print(
+    compute_cosine_similarity(
+        text_embeddings_dict[dog_text_1], text_embeddings_dict[dog_text_2]
+    )
+)
+
+pizza_text_1 = "He ate a lot of pizza."
+pizza_text_2 = "He devoured a large quantity of pizza pie."
+print(
+    compute_cosine_similarity(
+        text_embeddings_dict[pizza_text_1], text_embeddings_dict[pizza_text_2]
+    )
+)
+
+print(
+    compute_cosine_similarity(
+        text_embeddings_dict[dog_text_1], text_embeddings_dict[pizza_text_1]
+    )
+)
diff --git a/embeddings_chromadb/word_vectors.py b/embeddings_chromadb/word_vectors.py
new file mode 100644
index 0000000000..f3ddb9e2c9
--- /dev/null
+++ b/embeddings_chromadb/word_vectors.py
@@ -0,0 +1,34 @@
+import spacy
+from cosine_similarity import compute_cosine_similarity
+
+# Load the medium-size English model
+nlp = spacy.load("en_core_web_md")
+
+# Get the word vector for the word "dog"
+dog_embedding = nlp.vocab["dog"].vector
+
+# Word vectors are stored as NumPy arrays
+print(type(dog_embedding))
+
+# Word vector dimension
+print(dog_embedding.shape)
+
+# First 10 elements of the "dog" word vector
+print(dog_embedding[0:10])
+
+dog_embedding = nlp.vocab["dog"].vector
+cat_embedding = nlp.vocab["cat"].vector
+apple_embedding = nlp.vocab["apple"].vector
+tasty_embedding = nlp.vocab["tasty"].vector
+delicious_embedding = nlp.vocab["delicious"].vector
+truck_embedding = nlp.vocab["truck"].vector
+
+print(compute_cosine_similarity(dog_embedding, cat_embedding))
+
+print(compute_cosine_similarity(delicious_embedding, tasty_embedding))
+
+print(compute_cosine_similarity(apple_embedding, delicious_embedding))
+
+print(compute_cosine_similarity(dog_embedding, apple_embedding))
+
+print(compute_cosine_similarity(truck_embedding, delicious_embedding))

From 76c848a09f64e77e9bd5dc8d273e333a4918be53 Mon Sep 17 00:00:00 2001
From: hfhoffman1144 <hfh160130@utdallas.edu>
Date: Thu, 19 Oct 2023 08:23:49 -0500
Subject: [PATCH 2/5] renamed folder to slug

---
 .../README.md                                                     | 0
 .../car_data_etl.py                                               | 0
 .../chroma_utils.py                                               | 0
 .../config.json                                                   | 0
 .../cosine_similarity.py                                          | 0
 .../create_car_review_collection.py                               | 0
 .../intro_to_vectors.py                                           | 0
 .../llm_car_review_context.py                                     | 0
 .../text_embeddings.py                                            | 0
 .../word_vectors.py                                               | 0
 10 files changed, 0 insertions(+), 0 deletions(-)
 rename {embeddings_chromadb => embeddings-and-vector-databases-with-chromadb}/README.md (100%)
 rename {embeddings_chromadb => embeddings-and-vector-databases-with-chromadb}/car_data_etl.py (100%)
 rename {embeddings_chromadb => embeddings-and-vector-databases-with-chromadb}/chroma_utils.py (100%)
 rename {embeddings_chromadb => embeddings-and-vector-databases-with-chromadb}/config.json (100%)
 rename {embeddings_chromadb => embeddings-and-vector-databases-with-chromadb}/cosine_similarity.py (100%)
 rename {embeddings_chromadb => embeddings-and-vector-databases-with-chromadb}/create_car_review_collection.py (100%)
 rename {embeddings_chromadb => embeddings-and-vector-databases-with-chromadb}/intro_to_vectors.py (100%)
 rename {embeddings_chromadb => embeddings-and-vector-databases-with-chromadb}/llm_car_review_context.py (100%)
 rename {embeddings_chromadb => embeddings-and-vector-databases-with-chromadb}/text_embeddings.py (100%)
 rename {embeddings_chromadb => embeddings-and-vector-databases-with-chromadb}/word_vectors.py (100%)

diff --git a/embeddings_chromadb/README.md b/embeddings-and-vector-databases-with-chromadb/README.md
similarity index 100%
rename from embeddings_chromadb/README.md
rename to embeddings-and-vector-databases-with-chromadb/README.md
diff --git a/embeddings_chromadb/car_data_etl.py b/embeddings-and-vector-databases-with-chromadb/car_data_etl.py
similarity index 100%
rename from embeddings_chromadb/car_data_etl.py
rename to embeddings-and-vector-databases-with-chromadb/car_data_etl.py
diff --git a/embeddings_chromadb/chroma_utils.py b/embeddings-and-vector-databases-with-chromadb/chroma_utils.py
similarity index 100%
rename from embeddings_chromadb/chroma_utils.py
rename to embeddings-and-vector-databases-with-chromadb/chroma_utils.py
diff --git a/embeddings_chromadb/config.json b/embeddings-and-vector-databases-with-chromadb/config.json
similarity index 100%
rename from embeddings_chromadb/config.json
rename to embeddings-and-vector-databases-with-chromadb/config.json
diff --git a/embeddings_chromadb/cosine_similarity.py b/embeddings-and-vector-databases-with-chromadb/cosine_similarity.py
similarity index 100%
rename from embeddings_chromadb/cosine_similarity.py
rename to embeddings-and-vector-databases-with-chromadb/cosine_similarity.py
diff --git a/embeddings_chromadb/create_car_review_collection.py b/embeddings-and-vector-databases-with-chromadb/create_car_review_collection.py
similarity index 100%
rename from embeddings_chromadb/create_car_review_collection.py
rename to embeddings-and-vector-databases-with-chromadb/create_car_review_collection.py
diff --git a/embeddings_chromadb/intro_to_vectors.py b/embeddings-and-vector-databases-with-chromadb/intro_to_vectors.py
similarity index 100%
rename from embeddings_chromadb/intro_to_vectors.py
rename to embeddings-and-vector-databases-with-chromadb/intro_to_vectors.py
diff --git a/embeddings_chromadb/llm_car_review_context.py b/embeddings-and-vector-databases-with-chromadb/llm_car_review_context.py
similarity index 100%
rename from embeddings_chromadb/llm_car_review_context.py
rename to embeddings-and-vector-databases-with-chromadb/llm_car_review_context.py
diff --git a/embeddings_chromadb/text_embeddings.py b/embeddings-and-vector-databases-with-chromadb/text_embeddings.py
similarity index 100%
rename from embeddings_chromadb/text_embeddings.py
rename to embeddings-and-vector-databases-with-chromadb/text_embeddings.py
diff --git a/embeddings_chromadb/word_vectors.py b/embeddings-and-vector-databases-with-chromadb/word_vectors.py
similarity index 100%
rename from embeddings_chromadb/word_vectors.py
rename to embeddings-and-vector-databases-with-chromadb/word_vectors.py

From 5d419cddcb097743632ee9e53b4ba254ad3d8eb3 Mon Sep 17 00:00:00 2001
From: martin-martin <breuss.martin@gmail.com>
Date: Fri, 20 Oct 2023 11:41:33 +0200
Subject: [PATCH 3/5] Add requirements file

---
 .../README.md                                 |  4 +
 .../requirements.txt                          | 92 +++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 embeddings-and-vector-databases-with-chromadb/requirements.txt

diff --git a/embeddings-and-vector-databases-with-chromadb/README.md b/embeddings-and-vector-databases-with-chromadb/README.md
index 6f116901b0..429fc348dc 100644
--- a/embeddings-and-vector-databases-with-chromadb/README.md
+++ b/embeddings-and-vector-databases-with-chromadb/README.md
@@ -4,4 +4,8 @@ Supporting code for the Real Python tutorial [Embeddings and Vector Databases Wi
 
 To run the code in this tutorial, you should have `numpy`, `spacy`, `sentence-transformers`, `chromadb`, `polars`, `more-itertools`,  and `openai` installed in your environment. 
 
+You can install the dependencies manually, or by running:
 
+```
+(venv) $ python -m pip install -r requirements.txt
+```
diff --git a/embeddings-and-vector-databases-with-chromadb/requirements.txt b/embeddings-and-vector-databases-with-chromadb/requirements.txt
new file mode 100644
index 0000000000..48d2e1ed1c
--- /dev/null
+++ b/embeddings-and-vector-databases-with-chromadb/requirements.txt
@@ -0,0 +1,92 @@
+aiohttp==3.8.6
+aiosignal==1.3.1
+annotated-types==0.6.0
+anyio==3.7.1
+async-timeout==4.0.3
+attrs==23.1.0
+backoff==2.2.1
+bcrypt==4.0.1
+blis==0.7.11
+catalogue==2.0.10
+certifi==2023.7.22
+charset-normalizer==3.3.0
+chroma-hnswlib==0.7.3
+chromadb==0.4.14
+click==8.1.7
+cloudpathlib==0.16.0
+coloredlogs==15.0.1
+confection==0.1.3
+cymem==2.0.8
+fastapi==0.104.0
+filelock==3.12.4
+flatbuffers==23.5.26
+frozenlist==1.4.0
+fsspec==2023.9.2
+grpcio==1.59.0
+h11==0.14.0
+httptools==0.6.1
+huggingface-hub==0.17.3
+humanfriendly==10.0
+idna==3.4
+importlib-resources==6.1.0
+Jinja2==3.1.2
+joblib==1.3.2
+langcodes==3.3.0
+MarkupSafe==2.1.3
+monotonic==1.6
+more-itertools==10.1.0
+mpmath==1.3.0
+multidict==6.0.4
+murmurhash==1.0.10
+networkx==3.2
+nltk==3.8.1
+numpy==1.26.1
+onnxruntime==1.16.1
+openai==0.28.1
+overrides==7.4.0
+packaging==23.2
+Pillow==10.1.0
+polars==0.19.9
+posthog==3.0.2
+preshed==3.0.9
+protobuf==4.24.4
+pulsar-client==3.3.0
+pydantic==2.4.2
+pydantic_core==2.10.1
+PyPika==0.48.9
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+PyYAML==6.0.1
+regex==2023.10.3
+requests==2.31.0
+safetensors==0.4.0
+scikit-learn==1.3.1
+scipy==1.11.3
+sentence-transformers==2.2.2
+sentencepiece==0.1.99
+six==1.16.0
+smart-open==6.4.0
+sniffio==1.3.0
+spacy==3.7.2
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+srsly==2.4.8
+starlette==0.27.0
+sympy==1.12
+thinc==8.2.1
+threadpoolctl==3.2.0
+tokenizers==0.14.1
+torch==2.1.0
+torchvision==0.16.0
+tqdm==4.66.1
+transformers==4.34.1
+typer==0.9.0
+typing_extensions==4.8.0
+urllib3==2.0.7
+uvicorn==0.23.2
+uvloop==0.18.0
+wasabi==1.1.2
+watchfiles==0.21.0
+weasel==0.3.3
+websockets==11.0.3
+yarl==1.9.2

From a33d8d9c6d4f863c72db7b15a2293ae00bb79439 Mon Sep 17 00:00:00 2001
From: martin-martin <breuss.martin@gmail.com>
Date: Fri, 20 Oct 2023 11:42:13 +0200
Subject: [PATCH 4/5] Apply isort formatting

---
 .../car_data_etl.py                                          | 1 +
 .../chroma_utils.py                                          | 1 +
 .../create_car_review_collection.py                          | 1 +
 .../llm_car_review_context.py                                | 5 +++--
 .../text_embeddings.py                                       | 1 +
 .../word_vectors.py                                          | 1 +
 6 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/embeddings-and-vector-databases-with-chromadb/car_data_etl.py b/embeddings-and-vector-databases-with-chromadb/car_data_etl.py
index b454a4e19b..1026bc9f2d 100644
--- a/embeddings-and-vector-databases-with-chromadb/car_data_etl.py
+++ b/embeddings-and-vector-databases-with-chromadb/car_data_etl.py
@@ -1,4 +1,5 @@
 import pathlib
+
 import polars as pl
 
 
diff --git a/embeddings-and-vector-databases-with-chromadb/chroma_utils.py b/embeddings-and-vector-databases-with-chromadb/chroma_utils.py
index e1afd386c6..e565177743 100644
--- a/embeddings-and-vector-databases-with-chromadb/chroma_utils.py
+++ b/embeddings-and-vector-databases-with-chromadb/chroma_utils.py
@@ -1,4 +1,5 @@
 import pathlib
+
 import chromadb
 from chromadb.utils import embedding_functions
 from more_itertools import batched
diff --git a/embeddings-and-vector-databases-with-chromadb/create_car_review_collection.py b/embeddings-and-vector-databases-with-chromadb/create_car_review_collection.py
index 1e7c4643fb..929c9a2522 100644
--- a/embeddings-and-vector-databases-with-chromadb/create_car_review_collection.py
+++ b/embeddings-and-vector-databases-with-chromadb/create_car_review_collection.py
@@ -1,5 +1,6 @@
 import chromadb
 from chromadb.utils import embedding_functions
+
 from car_data_etl import prepare_car_reviews_data
 from chroma_utils import build_chroma_collection
 
diff --git a/embeddings-and-vector-databases-with-chromadb/llm_car_review_context.py b/embeddings-and-vector-databases-with-chromadb/llm_car_review_context.py
index f648a9940e..cc9bff4112 100644
--- a/embeddings-and-vector-databases-with-chromadb/llm_car_review_context.py
+++ b/embeddings-and-vector-databases-with-chromadb/llm_car_review_context.py
@@ -1,7 +1,8 @@
-import os
 import json
-import openai
+import os
+
 import chromadb
+import openai
 from chromadb.utils import embedding_functions
 
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
diff --git a/embeddings-and-vector-databases-with-chromadb/text_embeddings.py b/embeddings-and-vector-databases-with-chromadb/text_embeddings.py
index 073416f357..adce790e63 100644
--- a/embeddings-and-vector-databases-with-chromadb/text_embeddings.py
+++ b/embeddings-and-vector-databases-with-chromadb/text_embeddings.py
@@ -1,4 +1,5 @@
 from sentence_transformers import SentenceTransformer
+
 from cosine_similarity import compute_cosine_similarity
 
 model = SentenceTransformer("all-MiniLM-L6-v2")
diff --git a/embeddings-and-vector-databases-with-chromadb/word_vectors.py b/embeddings-and-vector-databases-with-chromadb/word_vectors.py
index f3ddb9e2c9..ccaed6e0a5 100644
--- a/embeddings-and-vector-databases-with-chromadb/word_vectors.py
+++ b/embeddings-and-vector-databases-with-chromadb/word_vectors.py
@@ -1,4 +1,5 @@
 import spacy
+
 from cosine_similarity import compute_cosine_similarity
 
 # Load the medium-size English model

From 393b2c0517085fbbc6279b7e8bd764aada20731c Mon Sep 17 00:00:00 2001
From: KateFinegan <95366190+KateFinegan@users.noreply.github.com>
Date: Tue, 31 Oct 2023 19:15:42 -0600
Subject: [PATCH 5/5] Language edit

---
 embeddings-and-vector-databases-with-chromadb/chroma_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/embeddings-and-vector-databases-with-chromadb/chroma_utils.py b/embeddings-and-vector-databases-with-chromadb/chroma_utils.py
index e565177743..b45f12c934 100644
--- a/embeddings-and-vector-databases-with-chromadb/chroma_utils.py
+++ b/embeddings-and-vector-databases-with-chromadb/chroma_utils.py
@@ -28,9 +28,9 @@ def build_chroma_collection(
         metadata={"hnsw:space": distance_func_name},
     )
 
-    document_indicies = list(range(len(documents)))
+    document_indices = list(range(len(documents)))
 
-    for batch in batched(document_indicies, 166):
+    for batch in batched(document_indices, 166):
         start_idx = batch[0]
         end_idx = batch[-1]