From 6d718c17d356a943a1e443c3a5d7d910843791c5 Mon Sep 17 00:00:00 2001
From: Farzad <fsunavala@microsoft.com>
Date: Sat, 4 Jan 2025 17:31:05 -0600
Subject: [PATCH 1/9] docling azure ai search

---
 docs/examples/rag_azuresearch.ipynb | 749 ++++++++++++++++++++++++++++
 1 file changed, 749 insertions(+)
 create mode 100644 docs/examples/rag_azuresearch.ipynb

diff --git a/docs/examples/rag_azuresearch.ipynb b/docs/examples/rag_azuresearch.ipynb
new file mode 100644
index 00000000..3f441088
--- /dev/null
+++ b/docs/examples/rag_azuresearch.ipynb
@@ -0,0 +1,749 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Ag9kcX2B_atc"
+   },
+   "source": [
+    "# RAG using Docling + Azure AI Search + Azure OpenAI\n",
+    "\n",
+    "This is a code recipe that uses [Azure AI Search](https://azure.microsoft.com/en-us/products/ai-services/ai-search/?msockid=0109678bea39665431e37323ebff6723) to perform RAG over PDF documents parsed by [Docling](https://ds4sd.github.io/docling/).\n",
+    "\n",
+    "# Description:\n",
+    "\n",
+    "1. Parse and chunk \"State of AI\" PPTX from Google Slides using Docling\n",
+    "2. Use Azure OpenAI embeddings for vector creation\n",
+    "3. Insert vector data into Azure AI Search\n",
+    "4. Perform a RAG query using Azure AI Search and Azure OpenAI\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If running in a new environment, uncomment and run these:\n",
+    "%pip install docling~=\"2.7.0\"\n",
+    "%pip install 'docling-core[chunking]'\n",
+    "%pip install azure-search-documents==11.5.2\n",
+    "%pip install azure-identity\n",
+    "%pip install openai\n",
+    "%pip install rich\n",
+    "%pip install torch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Part 0: Prerequisites\n",
+    "Before running this notebook, you'll need:\n",
+    "\n",
+    "1) Azure AI Search resource\n",
+    "   - If using Role-based authentication, enable \"Managed Identities\" or \"both\" in the portal\n",
+    "   - If using API keys, supply them in environment variables or secrets\n",
+    "\n",
+    "2) Azure OpenAI resource\n",
+    "   - Deployed an Embeddings model (e.g., text-embedding-3-small)\n",
+    "   - Deployed a Chat model (e.g., gpt-4o)\n",
+    "\n",
+    "3) Docling installed\n",
+    "4) Python 3.8+ environment with the packages listed above"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CUDA GPU is enabled: NVIDIA A100 80GB PCIe\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# Check if GPU or MPS is available\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda\")\n",
+    "    print(f\"CUDA GPU is enabled: {torch.cuda.get_device_name(0)}\")\n",
+    "elif torch.backends.mps.is_available():\n",
+    "    device = torch.device(\"mps\")\n",
+    "    print(\"MPS GPU is enabled.\")\n",
+    "else:\n",
+    "    raise EnvironmentError(\n",
+    "        \"No GPU or MPS device found. Please check your environment and ensure GPU or MPS support is configured.\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Part 1: Configure environment vars"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from azure.identity import DefaultAzureCredential\n",
+    "from azure.search.documents import SearchClient\n",
+    "from azure.search.documents.models import VectorizableTextQuery\n",
+    "import openai\n",
+    "from rich.console import Console\n",
+    "from rich.panel import Panel\n",
+    "import os\n",
+    "import os\n",
+    "\n",
+    "from azure.identity import DefaultAzureCredential\n",
+    "from azure.search.documents import SearchClient\n",
+    "from azure.search.documents.models import VectorizableTextQuery\n",
+    "from azure.search.documents.indexes import SearchIndexClient\n",
+    "from azure.search.documents.indexes.models import (\n",
+    "    SearchIndex,\n",
+    "    SearchField,\n",
+    "    SearchFieldDataType,\n",
+    "    SimpleField,\n",
+    "    SearchableField,\n",
+    "    VectorSearch,\n",
+    "    HnswAlgorithmConfiguration,\n",
+    "    VectorSearchProfile\n",
+    ")\n",
+    "from azure.search.documents.indexes.models import (\n",
+    "    SearchField,\n",
+    "    SearchFieldDataType,\n",
+    "    VectorSearch,\n",
+    "    HnswAlgorithmConfiguration,\n",
+    "    VectorSearchProfile,\n",
+    "    AzureOpenAIVectorizer,\n",
+    "    AzureOpenAIVectorizerParameters,\n",
+    ")\n",
+    "from azure.core.credentials import AzureKeyCredential\n",
+    "\n",
+    "import openai\n",
+    "from rich.console import Console\n",
+    "from rich.panel import Panel\n",
+    "from azure.search.documents import SearchClient\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "AZURE_SEARCH_ENDPOINT = os.getenv(\"AZURE_SEARCH_ENDPOINT\")\n",
+    "AZURE_SEARCH_KEY = os.getenv(\"AZURE_SEARCH_KEY\")\n",
+    "AZURE_SEARCH_INDEX_NAME = os.getenv(\"AZURE_SEARCH_INDEX_NAME\")\n",
+    "AZURE_OPENAI_ENDPOINT = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
+    "AZURE_OPENAI_API_KEY = os.getenv(\"AZURE_OPENAI_API_KEY\")\n",
+    "AZURE_OPENAI_CHAT_MODEL = os.getenv(\"AZURE_OPENAI_CHAT_MODEL\")\n",
+    "AZURE_OPENAI_API_VERSION = os.getenv(\"AZURE_OPENAI_API_VERSION\")\n",
+    "AZURE_OPENAI_EMBEDDINGS = os.getenv(\"AZURE_OPENAI_EMBEDDINGS\")\n",
+    "\n",
+    "# # Provide environment variables or paste in your values:\n",
+    "# AZURE_SEARCH_ENDPOINT = (\n",
+    "#     os.getenv(\"AZURE_SEARCH_ENDPOINT\") or \"PUT_AZURE_SEARCH_ENDPOINT_HERE\"\n",
+    "# )\n",
+    "# AZURE_SEARCH_INDEX_NAME = os.getenv(\"AZURE_SEARCH_INDEX_NAME\") or \"docling-rag-sample\"\n",
+    "# AZURE_SEARCH_ADMIN_KEY = (\n",
+    "#     os.getenv(\"AZURE_SEARCH_KEY\") or \"YOUR_SEARCH_ADMIN_KEY_OR_DELETE_IF_RBAC\"\n",
+    "# )\n",
+    "# AZURE_OPENAI_ENDPOINT = (\n",
+    "#     os.getenv(\"AZURE_OPENAI_ENDPOINT\") or \"PUT_AZURE_OPENAI_ENDPOINT_HERE\"\n",
+    "# )\n",
+    "# AZURE_OPENAI_API_VERSION = os.getenv(\"AZURE_OPENAI_API_VERSION\") or \"2024-06-01\"\n",
+    "# AZURE_OPENAI_EMBEDDINGS = (\n",
+    "#     os.getenv(\"AZURE_OPENAI_EMBEDDINGS\") or \"text-embedding-3-large\"\n",
+    "# )\n",
+    "# AZURE_OPENAI_CHAT_MODEL = os.getenv(\"AZURE_OPENAI_CHAT_MODEL\") or \"gpt-4o\"\n",
+    "\n",
+    "# # If using Key-based auth for Azure OpenAI\n",
+    "# AZURE_OPENAI_KEY = os.getenv(\"AZURE_OPENAI_KEY\") or \"YOUR_OPENAI_KEY_HERE\"\n",
+    "\n",
+    "# # If using Role-based auth for Azure OpenAI, comment out openai.api_key below\n",
+    "# openai.api_key = AZURE_OPENAI_KEY\n",
+    "console = Console()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Part 2: Parse with Docling \n",
+    "We'll parse the \"State of AI\" slides from a remote link. Feel free to use whatever document or source you want. \n",
+    "\n",
+    "Note: In real use, you might prefer doc_converter.convert_all() or a single convert() call.\n",
+    "We'll show a simple approach here.\n",
+    "\n",
+    "On a A100 GPU, it took ~4 mins. Azure SKU: \"Standard_NC24ads_A100_v4 (24 cores, 220 GB RAM, 64 GB disk)\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 97541.95it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">╭─────────────────────────────────────────── Docling Markdown Preview ────────────────────────────────────────────╮\n",
+       "│ ## STATE OF AI REPORT.                                                                                          │\n",
+       "│                                                                                                                 │\n",
+       "│ October 10, 2024                                                                                                │\n",
+       "│                                                                                                                 │\n",
+       "│ Nathan Benaich                                                                                                  │\n",
+       "│                                                                                                                 │\n",
+       "│ AIR STREET CAPITAL.                                                                                             │\n",
+       "│                                                                                                                 │\n",
+       "│ ## About the authors                                                                                            │\n",
+       "│                                                                                                                 │\n",
+       "│ Nathan Benaich                                                                                                  │\n",
+       "│                                                                                                                 │\n",
+       "│ &lt;!-- image --&gt;                                                                                                  │\n",
+       "│                                                                                                                 │\n",
+       "│ Nathan is the General Partner of Air Street Capital , a venture capital firm investing in AI-first companies.   │\n",
+       "│ He runs the Research and Applied AI Summit (RAAIS), the RAAIS Foundation (funding open-source AI projects), AI  │\n",
+       "│ communities in the US and Europe, and Spinout.fyi (improving university spinout creation). He studied biology   │\n",
+       "│ at Williams College and earned a PhD...                                                                         │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "╭─────────────────────────────────────────── Docling Markdown Preview ────────────────────────────────────────────╮\n",
+       "│ ## STATE OF AI REPORT.                                                                                          │\n",
+       "│                                                                                                                 │\n",
+       "│ October 10, 2024                                                                                                │\n",
+       "│                                                                                                                 │\n",
+       "│ Nathan Benaich                                                                                                  │\n",
+       "│                                                                                                                 │\n",
+       "│ AIR STREET CAPITAL.                                                                                             │\n",
+       "│                                                                                                                 │\n",
+       "│ ## About the authors                                                                                            │\n",
+       "│                                                                                                                 │\n",
+       "│ Nathan Benaich                                                                                                  │\n",
+       "│                                                                                                                 │\n",
+       "│ <!-- image -->                                                                                                  │\n",
+       "│                                                                                                                 │\n",
+       "│ Nathan is the General Partner of Air Street Capital , a venture capital firm investing in AI-first companies.   │\n",
+       "│ He runs the Research and Applied AI Summit (RAAIS), the RAAIS Foundation (funding open-source AI projects), AI  │\n",
+       "│ communities in the US and Europe, and Spinout.fyi (improving university spinout creation). He studied biology   │\n",
+       "│ at Williams College and earned a PhD...                                                                         │\n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from docling.document_converter import DocumentConverter\n",
+    "\n",
+    "source_url = \"https://ignite2024demo.blob.core.windows.net/state-of-ai-2024/State of AI Report 2024.pdf\"\n",
+    "\n",
+    "converter = DocumentConverter()\n",
+    "result = converter.convert(source_url)\n",
+    "\n",
+    "# We'll just display the Markdown output to confirm parse success:\n",
+    "md_preview = result.document.export_to_markdown()\n",
+    "console.print(Panel(md_preview[:500] + \"...\", title=\"Docling Markdown Preview\"))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Part 3: Hierarchical Chunking of the parsed text "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Total chunks from PDF: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">966</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Total chunks from PDF: \u001b[1;36m966\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from docling_core.transforms.chunker import HierarchicalChunker\n",
+    "\n",
+    "chunker = HierarchicalChunker()\n",
+    "\n",
+    "# We'll chunk the single result.document from above\n",
+    "doc_chunks = list(chunker.chunk(result.document))\n",
+    "\n",
+    "# For each chunk, create a simple \"content\" text. \n",
+    "# Optionally you can prefix with a doc/page title if relevant.\n",
+    "all_chunks = []\n",
+    "for idx, c in enumerate(doc_chunks):\n",
+    "    chunk_text = c.text  \n",
+    "    all_chunks.append((f\"chunk_{idx}\", chunk_text))\n",
+    "\n",
+    "console.print(f\"Total chunks from PDF: {len(all_chunks)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Part 4: Create an Azure AI Search index and push chunk embeddings \n",
+    "We'll embed each chunk using Azure OpenAI, then upsert to a custom index\n",
+    "that has:\n",
+    "- a primary key: chunk_id\n",
+    "- a text field: content\n",
+    "- a vector field: content_vector (dimension 1536, if using text-embedding-3-small)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create the search index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Index <span style=\"color: #008000; text-decoration-color: #008000\">'docling-rag-sample'</span> created.\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Index \u001b[32m'docling-rag-sample'\u001b[0m created.\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "VECTOR_DIM = 1536  # Adjust as needed for your embedding model\n",
+    "\n",
+    "index_client = SearchIndexClient(\n",
+    "    AZURE_SEARCH_ENDPOINT, AzureKeyCredential(AZURE_SEARCH_KEY)\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def create_search_index(index_name: str):\n",
+    "    fields = [\n",
+    "        SimpleField(name=\"chunk_id\", type=SearchFieldDataType.String, key=True),\n",
+    "        SearchableField(name=\"content\", type=SearchFieldDataType.String),\n",
+    "        SearchField(\n",
+    "            name=\"content_vector\",\n",
+    "            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),\n",
+    "            searchable=True,\n",
+    "            filterable=False,\n",
+    "            sortable=False,\n",
+    "            facetable=False,\n",
+    "            vector_search_dimensions=VECTOR_DIM,\n",
+    "            vector_search_profile_name=\"default\",\n",
+    "        ),\n",
+    "    ]\n",
+    "\n",
+    "    vector_search = VectorSearch(\n",
+    "        algorithms=[HnswAlgorithmConfiguration(name=\"default\")],\n",
+    "        profiles=[\n",
+    "            VectorSearchProfile(\n",
+    "                name=\"default\",\n",
+    "                algorithm_configuration_name=\"default\",\n",
+    "                vectorizer_name=\"default\",\n",
+    "            )\n",
+    "        ],\n",
+    "        vectorizers=[\n",
+    "            AzureOpenAIVectorizer(\n",
+    "                vectorizer_name=\"default\",\n",
+    "                parameters=AzureOpenAIVectorizerParameters(\n",
+    "                    resource_url=AZURE_OPENAI_ENDPOINT,\n",
+    "                    deployment_name=AZURE_OPENAI_EMBEDDINGS,\n",
+    "                    model_name=\"text-embedding-3-small\",\n",
+    "                    api_key=AZURE_OPENAI_API_KEY,\n",
+    "                ),\n",
+    "            )\n",
+    "        ],\n",
+    "    )\n",
+    "\n",
+    "    new_index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search)\n",
+    "\n",
+    "    try:\n",
+    "        index_client.delete_index(index_name)\n",
+    "    except:\n",
+    "        pass\n",
+    "\n",
+    "    index_client.create_or_update_index(new_index)\n",
+    "    console.print(f\"Index '{index_name}' created.\")\n",
+    "\n",
+    "\n",
+    "create_search_index(AZURE_SEARCH_INDEX_NAME)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## B) Generate embeddings & upsert"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span>; success: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, status code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Uploaded batch \u001b[1;36m0\u001b[0m -> \u001b[1;36m250\u001b[0m; success: \u001b[3;92mTrue\u001b[0m, status code: \u001b[1;36m201\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span>; success: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, status code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Uploaded batch \u001b[1;36m250\u001b[0m -> \u001b[1;36m500\u001b[0m; success: \u001b[3;92mTrue\u001b[0m, status code: \u001b[1;36m201\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">750</span>; success: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, status code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Uploaded batch \u001b[1;36m500\u001b[0m -> \u001b[1;36m750\u001b[0m; success: \u001b[3;92mTrue\u001b[0m, status code: \u001b[1;36m201\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">750</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">966</span>; success: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, status code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Uploaded batch \u001b[1;36m750\u001b[0m -> \u001b[1;36m966\u001b[0m; success: \u001b[3;92mTrue\u001b[0m, status code: \u001b[1;36m201\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">All chunks uploaded to Azure Search.\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "All chunks uploaded to Azure Search.\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from openai import AzureOpenAI\n",
+    "\n",
+    "search_client = SearchClient(\n",
+    "    AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_INDEX_NAME, AzureKeyCredential(AZURE_SEARCH_KEY)\n",
+    ")\n",
+    "openai_client = AzureOpenAI(\n",
+    "    api_key=AZURE_OPENAI_API_KEY,\n",
+    "    api_version=AZURE_OPENAI_API_VERSION,\n",
+    "    azure_endpoint=AZURE_OPENAI_ENDPOINT,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def embed_text(text: str):\n",
+    "    # Basic call to Azure OpenAI Embeddings\n",
+    "    response = openai_client.embeddings.create(\n",
+    "        input=text, model=AZURE_OPENAI_EMBEDDINGS  # or deployment name\n",
+    "    )\n",
+    "    return response.data[0].embedding\n",
+    "\n",
+    "\n",
+    "import uuid\n",
+    "\n",
+    "upload_docs = []\n",
+    "for chunk_id, chunk_text in all_chunks:\n",
+    "    embedding_vector = embed_text(chunk_text)\n",
+    "    upload_docs.append(\n",
+    "        {\n",
+    "            \"chunk_id\": str(uuid.uuid4()),\n",
+    "            \"content\": chunk_text,\n",
+    "            \"content_vector\": embedding_vector,\n",
+    "        }\n",
+    "    )\n",
+    "\n",
+    "# Upload in small batches\n",
+    "BATCH_SIZE = 250\n",
+    "for i in range(0, len(upload_docs), BATCH_SIZE):\n",
+    "    subset = upload_docs[i : i + BATCH_SIZE]\n",
+    "    resp = search_client.upload_documents(documents=subset)\n",
+    "    console.print(\n",
+    "    f\"Uploaded batch {i} -> {i+len(subset)}; success: {resp[0].succeeded}, status code: {resp[0].status_code}\"\n",
+    ")\n",
+    "\n",
+    "console.print(\"All chunks uploaded to Azure Search.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Part 5: RAG Query with Azure OpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">╭──────────────────────────────────────────────────</span> RAG Prompt <span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">───────────────────────────────────────────────────╮</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│                                                                                                                 │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ You are an AI assistant helping summarize the State of AI 2024 PDF.                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Use ONLY the text below to answer the user's question.                                                          │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ If the answer isn't in the text, say you don't know.                                                            │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Context:                                                                                                        │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ -NVIDIA remains the most powerful company in the world, enjoying a stint in the $3T club, while regulators      │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ probe the concentrations of power within GenAI.                                                                 │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ -More established GenAI companies bring in billions of dollars in revenue, while start-ups begin to gain        │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ traction in sectors like video and audio generation. Although companies begin to make the journey from model to │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ product, long-term questions around pricing and sustainability remain unresolved.                               │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ -Driven by a bull run in public markets, AI companies reach $9T in value, while investment levels grow          │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ healthily in private companies.                                                                                 │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ---                                                                                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Driven by GenAI megarounds like xAI and OpenAI's $6B fundraises, US private market continue to lead. Total      │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ investment into AI companies reached close to $100B.                                                            │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ---                                                                                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ stateof.ai 2024 Sam Altman is reportedly raising huge sums of money to do this, while each of Google, Amazon,   │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Meta and Microsoft continue to build and improve their owned AI silicon.                                        │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ---                                                                                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ stateof.ai 2024                                                                                                 │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ---                                                                                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ stateof.ai 2024                                                                                                 │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ---                                                                                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Of all venture-backed companies, the highest % of AI companies are found in robotics, enterprise software,      │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ space and security categories.                                                                                  │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ---                                                                                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Deep learning (DL): an approach to AI inspired by how neurons in the brain recognise complex patterns in data.  │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ The \"deep\" refers to the many layers of neurons in today's models that help to learn rich representations of    │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ data to achieve better performance gains.                                                                       │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ---                                                                                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ While private company valuations have continued to climb at a steady pace, a small handful of publicly traded   │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ companies have held up the market like Atlas. Publics alone now enjoy a greater enterprise value than the       │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ entire market in 2023.                                                                                          │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ---                                                                                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Analysis of the 100 highest revenue grossing AI companies using Stripe reveals that, as a group, they are       │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ generating revenue at a much faster pace than previous waves of equivalently well-performing SaaS companies.    │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Strikingly, the average AI company that has reached $30M+ annualised revenue took just 20 months to get there,  │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ compared to 65 months for equally promising SaaS companies.                                                     │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ---                                                                                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ A generative AI media company is investigated for its misuse during in the 2024 US election circuit.            │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│                                                                                                                 │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Question: in 2024, AI companies reached how many $$$ in value?                                                  │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Answer:                                                                                                         │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│                                                                                                                 │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[1;31m╭─\u001b[0m\u001b[1;31m─────────────────────────────────────────────────\u001b[0m RAG Prompt \u001b[1;31m──────────────────────────────────────────────────\u001b[0m\u001b[1;31m─╮\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m                                                                                                               \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mYou are an AI assistant helping summarize the State of AI 2024 PDF.\u001b[0m\u001b[1;31m                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mUse ONLY the text below to answer the user's question. \u001b[0m\u001b[1;31m                                                        \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mIf the answer isn't in the text, say you don't know.\u001b[0m\u001b[1;31m                                                           \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mContext:\u001b[0m\u001b[1;31m                                                                                                       \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m-NVIDIA remains the most powerful company in the world, enjoying a stint in the $3T club, while regulators \u001b[0m\u001b[1;31m    \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mprobe the concentrations of power within GenAI.\u001b[0m\u001b[1;31m                                                                \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m-More established GenAI companies bring in billions of dollars in revenue, while start-ups begin to gain \u001b[0m\u001b[1;31m      \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mtraction in sectors like video and audio generation. Although companies begin to make the journey from model to\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mproduct, long-term questions around pricing and sustainability remain unresolved.\u001b[0m\u001b[1;31m                              \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m-Driven by a bull run in public markets, AI companies reach $9T in value, while investment levels grow \u001b[0m\u001b[1;31m        \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mhealthily in private companies.\u001b[0m\u001b[1;31m                                                                                \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m                                                                                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mDriven by GenAI megarounds like xAI and OpenAI's $6B fundraises, US private market continue to lead. Total \u001b[0m\u001b[1;31m    \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31minvestment into AI companies reached close to $100B.\u001b[0m\u001b[1;31m                                                           \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m                                                                                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mstateof.ai 2024 Sam Altman is reportedly raising huge sums of money to do this, while each of Google, Amazon, \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mMeta and Microsoft continue to build and improve their owned AI silicon.\u001b[0m\u001b[1;31m                                       \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m                                                                                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mstateof.ai 2024\u001b[0m\u001b[1;31m                                                                                                \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m                                                                                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mstateof.ai 2024\u001b[0m\u001b[1;31m                                                                                                \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m                                                                                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mOf all venture-backed companies, the highest % of AI companies are found in robotics, enterprise software, \u001b[0m\u001b[1;31m    \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mspace and security categories.\u001b[0m\u001b[1;31m                                                                                 \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m                                                                                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mDeep learning (DL): an approach to AI inspired by how neurons in the brain recognise complex patterns in data. \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mThe \"deep\" refers to the many layers of neurons in today's models that help to learn rich representations of \u001b[0m\u001b[1;31m  \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mdata to achieve better performance gains.\u001b[0m\u001b[1;31m                                                                      \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m                                                                                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mWhile private company valuations have continued to climb at a steady pace, a small handful of publicly traded \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcompanies have held up the market like Atlas. Publics alone now enjoy a greater enterprise value than the \u001b[0m\u001b[1;31m     \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mentire market in 2023.\u001b[0m\u001b[1;31m                                                                                         \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m                                                                                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mAnalysis of the 100 highest revenue grossing AI companies using Stripe reveals that, as a group, they are \u001b[0m\u001b[1;31m     \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mgenerating revenue at a much faster pace than previous waves of equivalently well-performing SaaS companies. \u001b[0m\u001b[1;31m  \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mStrikingly, the average AI company that has reached $30M+ annualised revenue took just 20 months to get there, \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcompared to 65 months for equally promising SaaS companies.\u001b[0m\u001b[1;31m                                                    \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m                                                                                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mA generative AI media company is investigated for its misuse during in the 2024 US election circuit.\u001b[0m\u001b[1;31m           \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m                                                                                                               \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mQuestion: in 2024, AI companies reached how many $$$ in value?\u001b[0m\u001b[1;31m                                                 \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mAnswer:\u001b[0m\u001b[1;31m                                                                                                        \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m                                                                                                               \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">╭─────────────────────────────────────────────────</span> RAG Response <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">──────────────────────────────────────────────────╮</span>\n",
+       "<span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">│ AI companies reached $9T in value in 2024.                                                                      │</span>\n",
+       "<span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[1;32m╭─\u001b[0m\u001b[1;32m────────────────────────────────────────────────\u001b[0m RAG Response \u001b[1;32m─────────────────────────────────────────────────\u001b[0m\u001b[1;32m─╮\u001b[0m\n",
+       "\u001b[1;32m│\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mAI companies reached $9T in value in 2024.\u001b[0m\u001b[1;32m                                                                     \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m│\u001b[0m\n",
+       "\u001b[1;32m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "def generate_chat_response(prompt: str, system_message: str = None):\n",
+    "    \"\"\"\n",
+    "    Basic Chat request to Azure OpenAI.\n",
+    "    In production, consider passing more parameters (temperature, presence_penalty, etc.).\n",
+    "    \"\"\"\n",
+    "    messages = []\n",
+    "    if system_message:\n",
+    "        messages.append({\"role\": \"system\", \"content\": system_message})\n",
+    "    messages.append({\"role\": \"user\", \"content\": prompt})\n",
+    "\n",
+    "    completion = openai_client.chat.completions.create(\n",
+    "        model=AZURE_OPENAI_CHAT_MODEL, messages=messages, temperature=0.7\n",
+    "    )\n",
+    "    return completion.choices[0].message.content\n",
+    "\n",
+    "\n",
+    "# Example question\n",
+    "user_query = (\n",
+    "    \"in 2024, AI companies reached how many $$$ in value?\"\n",
+    ")\n",
+    "user_embed = embed_text(user_query)\n",
+    "\n",
+    "# We'll use integrated vectorization to generate query embeddings in Azure AI Search\n",
+    "vector_query = VectorizableTextQuery(\n",
+    "    text=user_query, k_nearest_neighbors=5, fields=\"content_vector\"\n",
+    ")\n",
+    "\n",
+    "search_results = search_client.search(\n",
+    "    search_text=user_query, vector_queries=[vector_query], select=[\"content\"], top=10\n",
+    ")\n",
+    "\n",
+    "retrieved_chunks = []\n",
+    "for result in search_results:\n",
+    "    snippet = result[\"content\"]\n",
+    "    retrieved_chunks.append(snippet)\n",
+    "\n",
+    "# Combine retrieved chunks\n",
+    "context_str = \"\\n---\\n\".join(retrieved_chunks)\n",
+    "\n",
+    "rag_prompt = f\"\"\"\n",
+    "You are an AI assistant helping summarize the State of AI 2024 PDF.\n",
+    "Use ONLY the text below to answer the user's question. \n",
+    "If the answer isn't in the text, say you don't know.\n",
+    "Context:\n",
+    "{context_str}\n",
+    "\n",
+    "Question: {user_query}\n",
+    "Answer:\n",
+    "\"\"\"\n",
+    "\n",
+    "final_answer = generate_chat_response(rag_prompt)\n",
+    "\n",
+    "console.print(Panel(rag_prompt, title=\"RAG Prompt\", style=\"bold red\"))\n",
+    "console.print(Panel(final_answer, title=\"RAG Response\", style=\"bold green\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK v2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

From acdb32d302d4d0b96da7ebab6c3e86ca42ea524b Mon Sep 17 00:00:00 2001
From: Farzad <fsunavala@microsoft.com>
Date: Sat, 4 Jan 2025 17:49:18 -0600
Subject: [PATCH 2/9] azure ai search updates

---
 docs/examples/rag_azuresearch.ipynb | 284 ++++++++++++----------------
 1 file changed, 118 insertions(+), 166 deletions(-)

diff --git a/docs/examples/rag_azuresearch.ipynb b/docs/examples/rag_azuresearch.ipynb
index 3f441088..ec0b8352 100644
--- a/docs/examples/rag_azuresearch.ipynb
+++ b/docs/examples/rag_azuresearch.ipynb
@@ -6,16 +6,19 @@
     "id": "Ag9kcX2B_atc"
    },
    "source": [
-    "# RAG using Docling + Azure AI Search + Azure OpenAI\n",
-    "\n",
-    "This is a code recipe that uses [Azure AI Search](https://azure.microsoft.com/en-us/products/ai-services/ai-search/?msockid=0109678bea39665431e37323ebff6723) to perform RAG over PDF documents parsed by [Docling](https://ds4sd.github.io/docling/).\n",
-    "\n",
-    "# Description:\n",
-    "\n",
-    "1. Parse and chunk \"State of AI\" PPTX from Google Slides using Docling\n",
-    "2. Use Azure OpenAI embeddings for vector creation\n",
-    "3. Insert vector data into Azure AI Search\n",
-    "4. Perform a RAG query using Azure AI Search and Azure OpenAI\n"
+    "# Building a RAG System with Docling and Azure AI Search\n",
+    "\n",
+    "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using:\n",
+    "- [Docling](https://ds4sd.github.io/docling/) for document parsing and chunking\n",
+    "- [Azure AI Search](https://azure.microsoft.com/products/ai-services/ai-search/?msockid=0109678bea39665431e37323ebff6723) for vector indexing and retrieval\n",
+    "- [Azure OpenAI](https://azure.microsoft.com/products/ai-services/openai-service?msockid=0109678bea39665431e37323ebff6723) for embeddings and chat completion\n",
+    "\n",
+    "This sample demonstrates how to:\n",
+    "1. Parse a PDF with Docling.\n",
+    "2. Chunk the parsed text.\n",
+    "3. Use Azure OpenAI for embeddings.\n",
+    "4. Index and search in Azure AI Search.\n",
+    "5. Run a retrieval-augmented generation (RAG) query with Azure OpenAI GPT-4o.\n"
    ]
   },
   {
@@ -26,7 +29,6 @@
    "source": [
     "# If running in a new environment, uncomment and run these:\n",
     "%pip install docling~=\"2.7.0\"\n",
-    "%pip install 'docling-core[chunking]'\n",
     "%pip install azure-search-documents==11.5.2\n",
     "%pip install azure-identity\n",
     "%pip install openai\n",
@@ -39,18 +41,10 @@
    "metadata": {},
    "source": [
     "# Part 0: Prerequisites\n",
-    "Before running this notebook, you'll need:\n",
-    "\n",
-    "1) Azure AI Search resource\n",
-    "   - If using Role-based authentication, enable \"Managed Identities\" or \"both\" in the portal\n",
-    "   - If using API keys, supply them in environment variables or secrets\n",
-    "\n",
-    "2) Azure OpenAI resource\n",
-    "   - Deployed an Embeddings model (e.g., text-embedding-3-small)\n",
-    "   - Deployed a Chat model (e.g., gpt-4o)\n",
-    "\n",
-    "3) Docling installed\n",
-    "4) Python 3.8+ environment with the packages listed above"
+    " - Azure AI Search resource\n",
+    " - Azure OpenAI resource with deployed embeddings & chat models\n",
+    " - Docling installed (Python 3.8+ environment)\n",
+    " - GPU or MPS recommended"
    ]
   },
   {
@@ -91,50 +85,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
-    "from azure.identity import DefaultAzureCredential\n",
-    "from azure.search.documents import SearchClient\n",
-    "from azure.search.documents.models import VectorizableTextQuery\n",
-    "import openai\n",
-    "from rich.console import Console\n",
-    "from rich.panel import Panel\n",
-    "import os\n",
-    "import os\n",
-    "\n",
-    "from azure.identity import DefaultAzureCredential\n",
-    "from azure.search.documents import SearchClient\n",
-    "from azure.search.documents.models import VectorizableTextQuery\n",
-    "from azure.search.documents.indexes import SearchIndexClient\n",
-    "from azure.search.documents.indexes.models import (\n",
-    "    SearchIndex,\n",
-    "    SearchField,\n",
-    "    SearchFieldDataType,\n",
-    "    SimpleField,\n",
-    "    SearchableField,\n",
-    "    VectorSearch,\n",
-    "    HnswAlgorithmConfiguration,\n",
-    "    VectorSearchProfile\n",
-    ")\n",
-    "from azure.search.documents.indexes.models import (\n",
-    "    SearchField,\n",
-    "    SearchFieldDataType,\n",
-    "    VectorSearch,\n",
-    "    HnswAlgorithmConfiguration,\n",
-    "    VectorSearchProfile,\n",
-    "    AzureOpenAIVectorizer,\n",
-    "    AzureOpenAIVectorizerParameters,\n",
-    ")\n",
-    "from azure.core.credentials import AzureKeyCredential\n",
-    "\n",
-    "import openai\n",
-    "from rich.console import Console\n",
-    "from rich.panel import Panel\n",
-    "from azure.search.documents import SearchClient\n",
-    "\n",
     "from dotenv import load_dotenv\n",
     "\n",
     "load_dotenv()\n",
@@ -146,56 +101,29 @@
     "AZURE_OPENAI_API_KEY = os.getenv(\"AZURE_OPENAI_API_KEY\")\n",
     "AZURE_OPENAI_CHAT_MODEL = os.getenv(\"AZURE_OPENAI_CHAT_MODEL\")\n",
     "AZURE_OPENAI_API_VERSION = os.getenv(\"AZURE_OPENAI_API_VERSION\")\n",
-    "AZURE_OPENAI_EMBEDDINGS = os.getenv(\"AZURE_OPENAI_EMBEDDINGS\")\n",
-    "\n",
-    "# # Provide environment variables or paste in your values:\n",
-    "# AZURE_SEARCH_ENDPOINT = (\n",
-    "#     os.getenv(\"AZURE_SEARCH_ENDPOINT\") or \"PUT_AZURE_SEARCH_ENDPOINT_HERE\"\n",
-    "# )\n",
-    "# AZURE_SEARCH_INDEX_NAME = os.getenv(\"AZURE_SEARCH_INDEX_NAME\") or \"docling-rag-sample\"\n",
-    "# AZURE_SEARCH_ADMIN_KEY = (\n",
-    "#     os.getenv(\"AZURE_SEARCH_KEY\") or \"YOUR_SEARCH_ADMIN_KEY_OR_DELETE_IF_RBAC\"\n",
-    "# )\n",
-    "# AZURE_OPENAI_ENDPOINT = (\n",
-    "#     os.getenv(\"AZURE_OPENAI_ENDPOINT\") or \"PUT_AZURE_OPENAI_ENDPOINT_HERE\"\n",
-    "# )\n",
-    "# AZURE_OPENAI_API_VERSION = os.getenv(\"AZURE_OPENAI_API_VERSION\") or \"2024-06-01\"\n",
-    "# AZURE_OPENAI_EMBEDDINGS = (\n",
-    "#     os.getenv(\"AZURE_OPENAI_EMBEDDINGS\") or \"text-embedding-3-large\"\n",
-    "# )\n",
-    "# AZURE_OPENAI_CHAT_MODEL = os.getenv(\"AZURE_OPENAI_CHAT_MODEL\") or \"gpt-4o\"\n",
-    "\n",
-    "# # If using Key-based auth for Azure OpenAI\n",
-    "# AZURE_OPENAI_KEY = os.getenv(\"AZURE_OPENAI_KEY\") or \"YOUR_OPENAI_KEY_HERE\"\n",
-    "\n",
-    "# # If using Role-based auth for Azure OpenAI, comment out openai.api_key below\n",
-    "# openai.api_key = AZURE_OPENAI_KEY\n",
-    "console = Console()"
+    "AZURE_OPENAI_EMBEDDINGS = os.getenv(\"AZURE_OPENAI_EMBEDDINGS\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Part 2: Parse with Docling \n",
-    "We'll parse the \"State of AI\" slides from a remote link. Feel free to use whatever document or source you want. \n",
-    "\n",
-    "Note: In real use, you might prefer doc_converter.convert_all() or a single convert() call.\n",
-    "We'll show a simple approach here.\n",
+    "# Part 2: Parse the PDF with Docling\n",
+    "Example: \"State of AI\" slides from a remote link.\n",
     "\n",
-    "On a A100 GPU, it took ~4 mins. Azure SKU: \"Standard_NC24ads_A100_v4 (24 cores, 220 GB RAM, 64 GB disk)\""
+    "You can find the raw powerpoint here: https://docs.google.com/presentation/d/1GmZmoWOa2O92BPrncRcTKa15xvQGhq7g4I4hJSNlC0M/edit?usp=sharing"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 97541.95it/s]\n"
+      "Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 109734.70it/s]\n"
      ]
     },
     {
@@ -251,28 +179,32 @@
     }
    ],
    "source": [
+    "from rich.console import Console\n",
+    "from rich.panel import Panel\n",
     "from docling.document_converter import DocumentConverter\n",
     "\n",
-    "source_url = \"https://ignite2024demo.blob.core.windows.net/state-of-ai-2024/State of AI Report 2024.pdf\"\n",
+    "console = Console()\n",
     "\n",
+    "source_url = \"https://ignite2024demo.blob.core.windows.net/state-of-ai-2024/State of AI Report 2024.pdf\"\n",
     "converter = DocumentConverter()\n",
     "result = converter.convert(source_url)\n",
     "\n",
-    "# We'll just display the Markdown output to confirm parse success:\n",
+    "# Optional: preview the parsed Markdown\n",
     "md_preview = result.document.export_to_markdown()\n",
-    "console.print(Panel(md_preview[:500] + \"...\", title=\"Docling Markdown Preview\"))\n"
+    "console.print(Panel(md_preview[:500] + \"...\", title=\"Docling Markdown Preview\"))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Part 3: Hierarchical Chunking of the parsed text "
+    "# Part 3: Hierarchical Chunking\n",
+    " Convert the Document into smaller chunks for embedding & indexing"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -293,15 +225,11 @@
     "from docling_core.transforms.chunker import HierarchicalChunker\n",
     "\n",
     "chunker = HierarchicalChunker()\n",
-    "\n",
-    "# We'll chunk the single result.document from above\n",
     "doc_chunks = list(chunker.chunk(result.document))\n",
     "\n",
-    "# For each chunk, create a simple \"content\" text. \n",
-    "# Optionally you can prefix with a doc/page title if relevant.\n",
     "all_chunks = []\n",
     "for idx, c in enumerate(doc_chunks):\n",
-    "    chunk_text = c.text  \n",
+    "    chunk_text = c.text\n",
     "    all_chunks.append((f\"chunk_{idx}\", chunk_text))\n",
     "\n",
     "console.print(f\"Total chunks from PDF: {len(all_chunks)}\")"
@@ -311,24 +239,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Part 4: Create an Azure AI Search index and push chunk embeddings \n",
-    "We'll embed each chunk using Azure OpenAI, then upsert to a custom index\n",
-    "that has:\n",
-    "- a primary key: chunk_id\n",
-    "- a text field: content\n",
-    "- a vector field: content_vector (dimension 1536, if using text-embedding-3-small)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Create the search index"
+    "# Part 4: Create Azure Search index and push chunk embeddings\n",
+    "We'll define a vector index and store chunk embeddings in Azure AI Search."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
@@ -346,12 +263,25 @@
     }
    ],
    "source": [
-    "VECTOR_DIM = 1536  # Adjust as needed for your embedding model\n",
-    "\n",
-    "index_client = SearchIndexClient(\n",
-    "    AZURE_SEARCH_ENDPOINT, AzureKeyCredential(AZURE_SEARCH_KEY)\n",
+    "from azure.identity import DefaultAzureCredential\n",
+    "from azure.search.documents.indexes import SearchIndexClient\n",
+    "from azure.search.documents.indexes.models import (\n",
+    "    SearchIndex,\n",
+    "    SearchField,\n",
+    "    SearchFieldDataType,\n",
+    "    SimpleField,\n",
+    "    SearchableField,\n",
+    "    VectorSearch,\n",
+    "    HnswAlgorithmConfiguration,\n",
+    "    VectorSearchProfile,\n",
+    "    AzureOpenAIVectorizer,\n",
+    "    AzureOpenAIVectorizerParameters,\n",
     ")\n",
+    "from azure.core.credentials import AzureKeyCredential\n",
+    "\n",
+    "VECTOR_DIM = 1536  # Adjust based on your chosen embeddings model\n",
     "\n",
+    "index_client = SearchIndexClient(AZURE_SEARCH_ENDPOINT, AzureKeyCredential(AZURE_SEARCH_KEY))\n",
     "\n",
     "def create_search_index(index_name: str):\n",
     "    fields = [\n",
@@ -391,7 +321,11 @@
     "        ],\n",
     "    )\n",
     "\n",
-    "    new_index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search)\n",
+    "    new_index = SearchIndex(\n",
+    "        name=index_name,\n",
+    "        fields=fields,\n",
+    "        vector_search=vector_search\n",
+    "    )\n",
     "\n",
     "    try:\n",
     "        index_client.delete_index(index_name)\n",
@@ -401,20 +335,19 @@
     "    index_client.create_or_update_index(new_index)\n",
     "    console.print(f\"Index '{index_name}' created.\")\n",
     "\n",
-    "\n",
-    "create_search_index(AZURE_SEARCH_INDEX_NAME)"
+    "create_search_index(AZURE_SEARCH_INDEX_NAME)\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## B) Generate embeddings & upsert"
+    "Embed chunks and upsert them into Azure AI Search"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [
     {
@@ -485,27 +418,23 @@
    ],
    "source": [
     "from openai import AzureOpenAI\n",
+    "from azure.search.documents import SearchClient\n",
+    "import uuid\n",
     "\n",
-    "search_client = SearchClient(\n",
-    "    AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_INDEX_NAME, AzureKeyCredential(AZURE_SEARCH_KEY)\n",
-    ")\n",
+    "search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_INDEX_NAME, AzureKeyCredential(AZURE_SEARCH_KEY))\n",
     "openai_client = AzureOpenAI(\n",
     "    api_key=AZURE_OPENAI_API_KEY,\n",
     "    api_version=AZURE_OPENAI_API_VERSION,\n",
     "    azure_endpoint=AZURE_OPENAI_ENDPOINT,\n",
     ")\n",
     "\n",
-    "\n",
     "def embed_text(text: str):\n",
-    "    # Basic call to Azure OpenAI Embeddings\n",
     "    response = openai_client.embeddings.create(\n",
-    "        input=text, model=AZURE_OPENAI_EMBEDDINGS  # or deployment name\n",
+    "        input=text,\n",
+    "        model=AZURE_OPENAI_EMBEDDINGS\n",
     "    )\n",
     "    return response.data[0].embedding\n",
     "\n",
-    "\n",
-    "import uuid\n",
-    "\n",
     "upload_docs = []\n",
     "for chunk_id, chunk_text in all_chunks:\n",
     "    embedding_vector = embed_text(chunk_text)\n",
@@ -517,28 +446,28 @@
     "        }\n",
     "    )\n",
     "\n",
-    "# Upload in small batches\n",
     "BATCH_SIZE = 250\n",
     "for i in range(0, len(upload_docs), BATCH_SIZE):\n",
     "    subset = upload_docs[i : i + BATCH_SIZE]\n",
     "    resp = search_client.upload_documents(documents=subset)\n",
     "    console.print(\n",
-    "    f\"Uploaded batch {i} -> {i+len(subset)}; success: {resp[0].succeeded}, status code: {resp[0].status_code}\"\n",
-    ")\n",
+    "        f\"Uploaded batch {i} -> {i+len(subset)}; success: {resp[0].succeeded}, status code: {resp[0].status_code}\"\n",
+    "    )\n",
     "\n",
-    "console.print(\"All chunks uploaded to Azure Search.\")"
+    "console.print(\"All chunks uploaded to Azure Search.\")\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Part 5: RAG Query with Azure OpenAI"
+    "# Part 5: RAG Query with Azure OpenAI\n",
+    "Combine retrieval from Azure Search with Chat Completions (aka. grounding your LLM)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
@@ -546,9 +475,10 @@
       "text/html": [
        "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">╭──────────────────────────────────────────────────</span> RAG Prompt <span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">───────────────────────────────────────────────────╮</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│                                                                                                                 │</span>\n",
-       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ You are an AI assistant helping summarize the State of AI 2024 PDF.                                             │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ You are an AI assistant helping answering questions about the State of AI 2024 Report.                          │</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Use ONLY the text below to answer the user's question.                                                          │</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ If the answer isn't in the text, say you don't know.                                                            │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│                                                                                                                 │</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Context:                                                                                                        │</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ -NVIDIA remains the most powerful company in the world, enjoying a stint in the $3T club, while regulators      │</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ probe the concentrations of power within GenAI.                                                                 │</span>\n",
@@ -584,7 +514,18 @@
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Strikingly, the average AI company that has reached $30M+ annualised revenue took just 20 months to get there,  │</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ compared to 65 months for equally promising SaaS companies.                                                     │</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ---                                                                                                             │</span>\n",
-       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ A generative AI media company is investigated for its misuse during in the 2024 US election circuit.            │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ In last year's report, we covered how the culture wars appeared to be slowly coming for AI, with the Gemini     │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ 'woke AI' blow up fuelling the fires. Could the US presidential election signal a change in direction?          │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ● The 2024 Republican platform commits to repealing the AI executive order (EO), claiming it \"hinders AI        │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Innovation, and imposes Radical Leftwing ideas on the development of this technology\", attracting the support   │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ of some big names in the Valley. It, however, makes no mention of the future of the US AISI.                    │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ● JD Vance is the first member of a presidential ticket to have apparently developed views on these issues,     │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ having previously accused big tech companies of using AI safety as a vehicle for regulatory capture.            │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ● Meanwhile, Kamala Harris has said less on the subject. However, her remarks when she visited the UK for the   │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Bletchley Summit were widely interpreted as an implicit critique of the focus on safety questions at the        │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ expense of ethics, echoing many UK civil society groups.                                                        │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ ● Regardless of the fate of the EO, at a Congressional level, safety remains a bipartisan issue, with both      │</span>\n",
+       "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ parties signing up to an AI policy roadmap in May.                                                              │</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│                                                                                                                 │</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Question: in 2024, AI companies reached how many $$$ in value?                                                  │</span>\n",
        "<span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">│ Answer:                                                                                                         │</span>\n",
@@ -595,9 +536,10 @@
       "text/plain": [
        "\u001b[1;31m╭─\u001b[0m\u001b[1;31m─────────────────────────────────────────────────\u001b[0m RAG Prompt \u001b[1;31m──────────────────────────────────────────────────\u001b[0m\u001b[1;31m─╮\u001b[0m\n",
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m                                                                                                               \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
-       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mYou are an AI assistant helping summarize the State of AI 2024 PDF.\u001b[0m\u001b[1;31m                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
-       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mUse ONLY the text below to answer the user's question. \u001b[0m\u001b[1;31m                                                        \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mYou are an AI assistant helping answering questions about the State of AI 2024 Report.\u001b[0m\u001b[1;31m                         \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mUse ONLY the text below to answer the user's question.\u001b[0m\u001b[1;31m                                                         \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mIf the answer isn't in the text, say you don't know.\u001b[0m\u001b[1;31m                                                           \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m                                                                                                               \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mContext:\u001b[0m\u001b[1;31m                                                                                                       \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m-NVIDIA remains the most powerful company in the world, enjoying a stint in the $3T club, while regulators \u001b[0m\u001b[1;31m    \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mprobe the concentrations of power within GenAI.\u001b[0m\u001b[1;31m                                                                \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
@@ -633,7 +575,18 @@
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mStrikingly, the average AI company that has reached $30M+ annualised revenue took just 20 months to get there, \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcompared to 65 months for equally promising SaaS companies.\u001b[0m\u001b[1;31m                                                    \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m                                                                                                            \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
-       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mA generative AI media company is investigated for its misuse during in the 2024 US election circuit.\u001b[0m\u001b[1;31m           \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mIn last year's report, we covered how the culture wars appeared to be slowly coming for AI, with the Gemini \u001b[0m\u001b[1;31m   \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m'woke AI' blow up fuelling the fires. Could the US presidential election signal a change in direction?\u001b[0m\u001b[1;31m         \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m● The 2024 Republican platform commits to repealing the AI executive order (EO), claiming it \"hinders AI \u001b[0m\u001b[1;31m      \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mInnovation, and imposes Radical Leftwing ideas on the development of this technology\", attracting the support \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mof some big names in the Valley. It, however, makes no mention of the future of the US AISI.\u001b[0m\u001b[1;31m                   \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m● JD Vance is the first member of a presidential ticket to have apparently developed views on these issues, \u001b[0m\u001b[1;31m   \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mhaving previously accused big tech companies of using AI safety as a vehicle for regulatory capture.\u001b[0m\u001b[1;31m           \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m● Meanwhile, Kamala Harris has said less on the subject. However, her remarks when she visited the UK for the \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mBletchley Summit were widely interpreted as an implicit critique of the focus on safety questions at the \u001b[0m\u001b[1;31m      \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mexpense of ethics, echoing many UK civil society groups.\u001b[0m\u001b[1;31m                                                       \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m● Regardless of the fate of the EO, at a Congressional level, safety remains a bipartisan issue, with both \u001b[0m\u001b[1;31m    \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
+       "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mparties signing up to an AI policy roadmap in May.\u001b[0m\u001b[1;31m                                                             \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m                                                                                                               \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mQuestion: in 2024, AI companies reached how many $$$ in value?\u001b[0m\u001b[1;31m                                                 \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
        "\u001b[1;31m│\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mAnswer:\u001b[0m\u001b[1;31m                                                                                                        \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m│\u001b[0m\n",
@@ -663,35 +616,35 @@
     }
    ],
    "source": [
+    "from azure.search.documents.models import VectorizableTextQuery\n",
+    "\n",
     "def generate_chat_response(prompt: str, system_message: str = None):\n",
-    "    \"\"\"\n",
-    "    Basic Chat request to Azure OpenAI.\n",
-    "    In production, consider passing more parameters (temperature, presence_penalty, etc.).\n",
-    "    \"\"\"\n",
     "    messages = []\n",
     "    if system_message:\n",
     "        messages.append({\"role\": \"system\", \"content\": system_message})\n",
     "    messages.append({\"role\": \"user\", \"content\": prompt})\n",
     "\n",
     "    completion = openai_client.chat.completions.create(\n",
-    "        model=AZURE_OPENAI_CHAT_MODEL, messages=messages, temperature=0.7\n",
+    "        model=AZURE_OPENAI_CHAT_MODEL,\n",
+    "        messages=messages,\n",
+    "        temperature=0.7\n",
     "    )\n",
     "    return completion.choices[0].message.content\n",
     "\n",
-    "\n",
-    "# Example question\n",
-    "user_query = (\n",
-    "    \"in 2024, AI companies reached how many $$$ in value?\"\n",
-    ")\n",
+    "user_query = \"in 2024, AI companies reached how many $$$ in value?\"\n",
     "user_embed = embed_text(user_query)\n",
     "\n",
-    "# We'll use integrated vectorization to generate query embeddings in Azure AI Search\n",
     "vector_query = VectorizableTextQuery(\n",
-    "    text=user_query, k_nearest_neighbors=5, fields=\"content_vector\"\n",
+    "    text=user_query, # passing in text for a hybrid search\n",
+    "    k_nearest_neighbors=5,\n",
+    "    fields=\"content_vector\"\n",
     ")\n",
     "\n",
     "search_results = search_client.search(\n",
-    "    search_text=user_query, vector_queries=[vector_query], select=[\"content\"], top=10\n",
+    "    search_text=user_query,\n",
+    "    vector_queries=[vector_query],\n",
+    "    select=[\"content\"],\n",
+    "    top=10\n",
     ")\n",
     "\n",
     "retrieved_chunks = []\n",
@@ -699,13 +652,12 @@
     "    snippet = result[\"content\"]\n",
     "    retrieved_chunks.append(snippet)\n",
     "\n",
-    "# Combine retrieved chunks\n",
     "context_str = \"\\n---\\n\".join(retrieved_chunks)\n",
-    "\n",
     "rag_prompt = f\"\"\"\n",
-    "You are an AI assistant helping summarize the State of AI 2024 PDF.\n",
-    "Use ONLY the text below to answer the user's question. \n",
+    "You are an AI assistant helping answering questions about the State of AI 2024 Report.\n",
+    "Use ONLY the text below to answer the user's question.\n",
     "If the answer isn't in the text, say you don't know.\n",
+    "\n",
     "Context:\n",
     "{context_str}\n",
     "\n",

From df6201a1c080eefd2de60b8104a9cc188459a9e5 Mon Sep 17 00:00:00 2001
From: Farzad <fsunavala@microsoft.com>
Date: Sat, 4 Jan 2025 17:52:07 -0600
Subject: [PATCH 3/9] mkdocs

---
 mkdocs.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mkdocs.yml b/mkdocs.yml
index 0428693c..ca682926 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -79,6 +79,7 @@ nav:
     - Chunking:
       - "Hybrid chunking": examples/hybrid_chunking.ipynb
     - RAG / QA:
+      - "RAG with Azure AI Search": examples/rag_azuresearch.ipynb
       - "RAG with Haystack": examples/rag_haystack.ipynb
       - "RAG with LlamaIndex 🦙": examples/rag_llamaindex.ipynb
       - "RAG with LangChain 🦜🔗": examples/rag_langchain.ipynb

From 86c1fd2dae8644fbcbf456e26d5878c413e594eb Mon Sep 17 00:00:00 2001
From: Farzad <fsunavala@microsoft.com>
Date: Sat, 4 Jan 2025 17:59:58 -0600
Subject: [PATCH 4/9] colab check

---
 docs/examples/rag_azuresearch.ipynb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/examples/rag_azuresearch.ipynb b/docs/examples/rag_azuresearch.ipynb
index ec0b8352..51af90f2 100644
--- a/docs/examples/rag_azuresearch.ipynb
+++ b/docs/examples/rag_azuresearch.ipynb
@@ -7,6 +7,7 @@
    },
    "source": [
     "# Building a RAG System with Docling and Azure AI Search\n",
+    "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/farzad528/docling/blob/tree/main/docs/examples/rag_azuresearch.ipynb)\n",
     "\n",
     "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using:\n",
     "- [Docling](https://ds4sd.github.io/docling/) for document parsing and chunking\n",

From e582b887e278e5ef32e25e7f2133cf297b8b8ae8 Mon Sep 17 00:00:00 2001
From: Farzad <fsunavala@microsoft.com>
Date: Sat, 4 Jan 2025 18:06:21 -0600
Subject: [PATCH 5/9] colab link fix

---
 docs/examples/rag_azuresearch.ipynb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/examples/rag_azuresearch.ipynb b/docs/examples/rag_azuresearch.ipynb
index 51af90f2..ebd1cebf 100644
--- a/docs/examples/rag_azuresearch.ipynb
+++ b/docs/examples/rag_azuresearch.ipynb
@@ -7,7 +7,8 @@
    },
    "source": [
     "# Building a RAG System with Docling and Azure AI Search\n",
-    "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/farzad528/docling/blob/tree/main/docs/examples/rag_azuresearch.ipynb)\n",
+    "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/farzad528/docling/blob/main/docs/examples/rag_azuresearch.ipynb)\n",
+    "\n",
     "\n",
     "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using:\n",
     "- [Docling](https://ds4sd.github.io/docling/) for document parsing and chunking\n",

From 674539c8fed19570e20398ff443d4ecbf63737ee Mon Sep 17 00:00:00 2001
From: Farzad <fsunavala@microsoft.com>
Date: Fri, 10 Jan 2025 16:36:16 -0600
Subject: [PATCH 6/9] pr comments

---
 docs/examples/rag_azuresearch.ipynb | 80 +++++++++++++++++++----------
 1 file changed, 54 insertions(+), 26 deletions(-)

diff --git a/docs/examples/rag_azuresearch.ipynb b/docs/examples/rag_azuresearch.ipynb
index ebd1cebf..cc4a9614 100644
--- a/docs/examples/rag_azuresearch.ipynb
+++ b/docs/examples/rag_azuresearch.ipynb
@@ -44,9 +44,10 @@
    "source": [
     "# Part 0: Prerequisites\n",
     " - Azure AI Search resource\n",
-    " - Azure OpenAI resource with deployed embeddings & chat models\n",
+    " - Azure OpenAI resource with A deployed embedding & chat completion model\n",
     " - Docling installed (Python 3.8+ environment)\n",
-    " - GPU or MPS recommended"
+    "\n",
+    "GPU or MPS usage can speed up Docling’s parsing (especially for large PDFs or when OCR/table extraction is needed). However, if no GPU is detected, you can comment out the following checks and proceed with CPU, albeit slower performance."
    ]
   },
   {
@@ -65,7 +66,6 @@
    "source": [
     "import torch\n",
     "\n",
-    "# Check if GPU or MPS is available\n",
     "if torch.cuda.is_available():\n",
     "    device = torch.device(\"cuda\")\n",
     "    print(f\"CUDA GPU is enabled: {torch.cuda.get_device_name(0)}\")\n",
@@ -73,8 +73,10 @@
     "    device = torch.device(\"mps\")\n",
     "    print(\"MPS GPU is enabled.\")\n",
     "else:\n",
+    "    # Comment out the error if you'd like to allow CPU fallback\n",
+    "    # But be aware parsing could be slower\n",
     "    raise EnvironmentError(\n",
-    "        \"No GPU or MPS device found. Please check your environment and ensure GPU or MPS support is configured.\"\n",
+    "        \"No GPU or MPS device found. Proceed with CPU only if you understand the performance implications.\"\n",
     "    )"
    ]
   },
@@ -100,10 +102,10 @@
     "AZURE_SEARCH_KEY = os.getenv(\"AZURE_SEARCH_KEY\")\n",
     "AZURE_SEARCH_INDEX_NAME = os.getenv(\"AZURE_SEARCH_INDEX_NAME\")\n",
     "AZURE_OPENAI_ENDPOINT = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
-    "AZURE_OPENAI_API_KEY = os.getenv(\"AZURE_OPENAI_API_KEY\")\n",
-    "AZURE_OPENAI_CHAT_MODEL = os.getenv(\"AZURE_OPENAI_CHAT_MODEL\")\n",
-    "AZURE_OPENAI_API_VERSION = os.getenv(\"AZURE_OPENAI_API_VERSION\")\n",
-    "AZURE_OPENAI_EMBEDDINGS = os.getenv(\"AZURE_OPENAI_EMBEDDINGS\")"
+    "AZURE_OPENAI_API_KEY = os.getenv(\"AZURE_OPENAI_API_KEY\") # Ensure this your Admin Key\n",
+    "AZURE_OPENAI_CHAT_MODEL = os.getenv(\"AZURE_OPENAI_CHAT_MODEL\") # Using a deployed model named \"gpt-4o\"\n",
+    "AZURE_OPENAI_API_VERSION = os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-10-21\")\n",
+    "AZURE_OPENAI_EMBEDDINGS = os.getenv(\"AZURE_OPENAI_EMBEDDINGS\") # Using a deployed model named \"text-embeddings-3-small\""
    ]
   },
   {
@@ -206,7 +208,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -316,13 +318,14 @@
     "                parameters=AzureOpenAIVectorizerParameters(\n",
     "                    resource_url=AZURE_OPENAI_ENDPOINT,\n",
     "                    deployment_name=AZURE_OPENAI_EMBEDDINGS,\n",
-    "                    model_name=\"text-embedding-3-small\",\n",
+    "                    model_name=\"text-embedding-3-small\", # same as the environment variable \n",
     "                    api_key=AZURE_OPENAI_API_KEY,\n",
     "                ),\n",
     "            )\n",
     "        ],\n",
     "    )\n",
-    "\n",
+    "    \n",
+    "    # Clean up any old index if it exists\n",
     "    new_index = SearchIndex(\n",
     "        name=index_name,\n",
     "        fields=fields,\n",
@@ -349,17 +352,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span>; success: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, status code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span>; all_succeeded: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, first_doc_status_code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
        "</pre>\n"
       ],
       "text/plain": [
-       "Uploaded batch \u001b[1;36m0\u001b[0m -> \u001b[1;36m250\u001b[0m; success: \u001b[3;92mTrue\u001b[0m, status code: \u001b[1;36m201\u001b[0m\n"
+       "Uploaded batch \u001b[1;36m0\u001b[0m -> \u001b[1;36m250\u001b[0m; all_succeeded: \u001b[3;92mTrue\u001b[0m, first_doc_status_code: \u001b[1;36m201\u001b[0m\n"
       ]
      },
      "metadata": {},
@@ -368,11 +371,11 @@
     {
      "data": {
       "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span>; success: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, status code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span>; all_succeeded: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, first_doc_status_code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
        "</pre>\n"
       ],
       "text/plain": [
-       "Uploaded batch \u001b[1;36m250\u001b[0m -> \u001b[1;36m500\u001b[0m; success: \u001b[3;92mTrue\u001b[0m, status code: \u001b[1;36m201\u001b[0m\n"
+       "Uploaded batch \u001b[1;36m250\u001b[0m -> \u001b[1;36m500\u001b[0m; all_succeeded: \u001b[3;92mTrue\u001b[0m, first_doc_status_code: \u001b[1;36m201\u001b[0m\n"
       ]
      },
      "metadata": {},
@@ -381,11 +384,11 @@
     {
      "data": {
       "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">750</span>; success: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, status code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">750</span>; all_succeeded: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, first_doc_status_code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
        "</pre>\n"
       ],
       "text/plain": [
-       "Uploaded batch \u001b[1;36m500\u001b[0m -> \u001b[1;36m750\u001b[0m; success: \u001b[3;92mTrue\u001b[0m, status code: \u001b[1;36m201\u001b[0m\n"
+       "Uploaded batch \u001b[1;36m500\u001b[0m -> \u001b[1;36m750\u001b[0m; all_succeeded: \u001b[3;92mTrue\u001b[0m, first_doc_status_code: \u001b[1;36m201\u001b[0m\n"
       ]
      },
      "metadata": {},
@@ -394,11 +397,11 @@
     {
      "data": {
       "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">750</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">966</span>; success: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, status code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploaded batch <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">750</span> -&gt; <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">966</span>; all_succeeded: <span style=\"color: #00ff00; text-decoration-color: #00ff00; font-style: italic\">True</span>, first_doc_status_code: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201</span>\n",
        "</pre>\n"
       ],
       "text/plain": [
-       "Uploaded batch \u001b[1;36m750\u001b[0m -> \u001b[1;36m966\u001b[0m; success: \u001b[3;92mTrue\u001b[0m, status code: \u001b[1;36m201\u001b[0m\n"
+       "Uploaded batch \u001b[1;36m750\u001b[0m -> \u001b[1;36m966\u001b[0m; all_succeeded: \u001b[3;92mTrue\u001b[0m, first_doc_status_code: \u001b[1;36m201\u001b[0m\n"
       ]
      },
      "metadata": {},
@@ -431,6 +434,9 @@
     ")\n",
     "\n",
     "def embed_text(text: str):\n",
+    "    \"\"\"\n",
+    "    Helper to generate embeddings with Azure OpenAI.\n",
+    "    \"\"\"\n",
     "    response = openai_client.embeddings.create(\n",
     "        input=text,\n",
     "        model=AZURE_OPENAI_EMBEDDINGS\n",
@@ -438,22 +444,26 @@
     "    return response.data[0].embedding\n",
     "\n",
     "upload_docs = []\n",
-    "for chunk_id, chunk_text in all_chunks:\n",
+    "for (chunk_id, chunk_text) in all_chunks:\n",
     "    embedding_vector = embed_text(chunk_text)\n",
     "    upload_docs.append(\n",
     "        {\n",
-    "            \"chunk_id\": str(uuid.uuid4()),\n",
+    "            \"chunk_id\": chunk_id,\n",
     "            \"content\": chunk_text,\n",
     "            \"content_vector\": embedding_vector,\n",
     "        }\n",
     "    )\n",
     "\n",
+    "\n",
     "BATCH_SIZE = 250\n",
     "for i in range(0, len(upload_docs), BATCH_SIZE):\n",
     "    subset = upload_docs[i : i + BATCH_SIZE]\n",
     "    resp = search_client.upload_documents(documents=subset)\n",
+    "\n",
+    "    all_succeeded = all(r.succeeded for r in resp)\n",
     "    console.print(\n",
-    "        f\"Uploaded batch {i} -> {i+len(subset)}; success: {resp[0].succeeded}, status code: {resp[0].status_code}\"\n",
+    "        f\"Uploaded batch {i} -> {i+len(subset)}; all_succeeded: {all_succeeded}, \"\n",
+    "        f\"first_doc_status_code: {resp[0].status_code}\"\n",
     "    )\n",
     "\n",
     "console.print(\"All chunks uploaded to Azure Search.\")\n"
@@ -621,6 +631,11 @@
     "from azure.search.documents.models import VectorizableTextQuery\n",
     "\n",
     "def generate_chat_response(prompt: str, system_message: str = None):\n",
+    "    \"\"\"\n",
+    "    Generates a single-turn chat response using Azure OpenAI Chat.\n",
+    "    If you need multi-turn conversation or follow-up queries, you'll have to\n",
+    "    maintain the messages list externally.\n",
+    "    \"\"\"\n",
     "    messages = []\n",
     "    if system_message:\n",
     "        messages.append({\"role\": \"system\", \"content\": system_message})\n",
@@ -672,6 +687,19 @@
     "console.print(Panel(rag_prompt, title=\"RAG Prompt\", style=\"bold red\"))\n",
     "console.print(Panel(final_answer, title=\"RAG Response\", style=\"bold green\"))"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Dataset Citation\n",
+    "\n",
+    "**State of AI Report 2024**  \n",
+    "Benaich, N. & Air Street Capital. (2024). *State of AI Report 2024*.  \n",
+    "Licensed under [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).  \n",
+    "Available at: [STATE OF AI REPORT 2024](https://www.stateof.ai/)\n",
+    "\n"
+   ]
   }
  ],
  "metadata": {
@@ -681,9 +709,9 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3.10 - SDK v2",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python310-sdkv2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -695,7 +723,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,

From 43f87971ec4e2d2097aea9f2390451e16363a254 Mon Sep 17 00:00:00 2001
From: Farzad <fsunavala@microsoft.com>
Date: Fri, 10 Jan 2025 16:45:18 -0600
Subject: [PATCH 7/9] title change

---
 docs/examples/rag_azuresearch.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/examples/rag_azuresearch.ipynb b/docs/examples/rag_azuresearch.ipynb
index cc4a9614..841fb6fa 100644
--- a/docs/examples/rag_azuresearch.ipynb
+++ b/docs/examples/rag_azuresearch.ipynb
@@ -6,7 +6,7 @@
     "id": "Ag9kcX2B_atc"
    },
    "source": [
-    "# Building a RAG System with Docling and Azure AI Search\n",
+    "# RAG with Azure AI Search\n",
     "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/farzad528/docling/blob/main/docs/examples/rag_azuresearch.ipynb)\n",
     "\n",
     "\n",

From 64b608f13f79c1b8b3334ea90ed18d1b28436903 Mon Sep 17 00:00:00 2001
From: Farzad <fsunavala@microsoft.com>
Date: Fri, 10 Jan 2025 16:47:49 -0600
Subject: [PATCH 8/9] table

---
 docs/examples/rag_azuresearch.ipynb | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/examples/rag_azuresearch.ipynb b/docs/examples/rag_azuresearch.ipynb
index 841fb6fa..f2a760c7 100644
--- a/docs/examples/rag_azuresearch.ipynb
+++ b/docs/examples/rag_azuresearch.ipynb
@@ -9,6 +9,12 @@
     "# RAG with Azure AI Search\n",
     "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/farzad528/docling/blob/main/docs/examples/rag_azuresearch.ipynb)\n",
     "\n",
+    "| Step               | Tech               | Execution |\n",
+    "| ------------------ | ------------------ | --------- |\n",
+    "| Embedding          | Azure OpenAI       | 🌐 Remote |\n",
+    "| Vector Store       | Azure AI Search    | 🌐 Remote |\n",
+    "| Gen AI  | Azure OpenAI GPT-4o | 🌐 Remote |\n",
+    "\n",
     "\n",
     "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using:\n",
     "- [Docling](https://ds4sd.github.io/docling/) for document parsing and chunking\n",

From c54ee398d16e2754d258e3ce2455f456999219f9 Mon Sep 17 00:00:00 2001
From: Farzad <fsunavala@microsoft.com>
Date: Fri, 10 Jan 2025 16:53:24 -0600
Subject: [PATCH 9/9] rename to Azure OpenAI

---
 docs/examples/rag_azuresearch.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/examples/rag_azuresearch.ipynb b/docs/examples/rag_azuresearch.ipynb
index f2a760c7..f4408d9d 100644
--- a/docs/examples/rag_azuresearch.ipynb
+++ b/docs/examples/rag_azuresearch.ipynb
@@ -13,7 +13,7 @@
     "| ------------------ | ------------------ | --------- |\n",
     "| Embedding          | Azure OpenAI       | 🌐 Remote |\n",
     "| Vector Store       | Azure AI Search    | 🌐 Remote |\n",
-    "| Gen AI  | Azure OpenAI GPT-4o | 🌐 Remote |\n",
+    "| Gen AI  | Azure OpenAI | 🌐 Remote |\n",
     "\n",
     "\n",
     "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using:\n",