From f093e9d9a0791743d59744261df4aefd591bae36 Mon Sep 17 00:00:00 2001 From: Li Yin Date: Sat, 14 Dec 2024 14:44:06 -0800 Subject: [PATCH] add change log --- adalflow/CHANGELOG.md | 8 +- .../components/model_client/bedrock_client.py | 4 + notebooks/adalflow_colab_template.ipynb | 4 +- notebooks/evaluation/adalflow_llm_eval.ipynb | 14 +- ...lflow_object_count_auto_optimization.ipynb | 143 +- ...adalflow_classification_optimization.ipynb | 912 ++++---- notebooks/tutorials/adalflow_component.ipynb | 1869 +++++++++-------- .../tutorials/adalflow_dataclasses.ipynb | 152 +- .../tutorials/adalflow_function_calls.ipynb | 1407 +++++++------ notebooks/tutorials/adalflow_logger.ipynb | 466 ++-- .../tutorials/adalflow_rag_optimization.ipynb | 969 ++++----- .../tutorials/adalflow_rag_playbook.ipynb | 1014 ++++----- .../tutorials/adalflow_text_splitter.ipynb | 24 +- notebooks/tutorials/adalflow_tracing.ipynb | 355 ++-- tutorials/database.ipynb | 128 +- tutorials/dataclass.ipynb | 49 +- tutorials/embedder.ipynb | 7 +- tutorials/generator.ipynb | 10 +- tutorials/model_client.ipynb | 19 +- tutorials/rag.ipynb | 13 +- tutorials/react_note.ipynb | 6 +- tutorials/retriever.ipynb | 227 +- tutorials/tools.ipynb | 491 +++-- use_cases/agent/react_agent.ipynb | 140 +- use_cases/generator/intermediate.ipynb | 18 +- use_cases/question_answering/chatbot.ipynb | 15 +- use_cases/question_answering/simple_qa.ipynb | 11 +- 27 files changed, 4359 insertions(+), 4116 deletions(-) diff --git a/adalflow/CHANGELOG.md b/adalflow/CHANGELOG.md index e5b806f8..f87773ab 100644 --- a/adalflow/CHANGELOG.md +++ b/adalflow/CHANGELOG.md @@ -1,4 +1,10 @@ - +## [0.2.7] - TO Be Released +### Added +- `Memory` is completed with `call` and `add_dialog_turn` methods. +- Integrated `LanceDB` in the `Retriever` +### Improved +- `BedrockAPIClient` added more details on setup, yet it is still in experimental stage. +- `AzureAPIClient` added more details on setup, yet it is still in experimental stage. ## [0.2.6] - 2024-11-25 ### Improved - Add default `max_tokens=512` to the `AnthropicAPIClient` to avoid the error when the user does not provide the `max_tokens` in the prompt. diff --git a/adalflow/adalflow/components/model_client/bedrock_client.py b/adalflow/adalflow/components/model_client/bedrock_client.py index d25b48bc..b10098bb 100644 --- a/adalflow/adalflow/components/model_client/bedrock_client.py +++ b/adalflow/adalflow/components/model_client/bedrock_client.py @@ -39,6 +39,10 @@ def get_first_message_content(completion: Dict) -> str: class BedrockAPIClient(ModelClient): __doc__ = r"""A component wrapper for the Bedrock API client. + Note: + + This api is in experimental and is not fully tested and validated yet. + Support: 1. AWS Titan 2. Claude diff --git a/notebooks/adalflow_colab_template.ipynb b/notebooks/adalflow_colab_template.ipynb index 191bbf08..39715816 100644 --- a/notebooks/adalflow_colab_template.ipynb +++ b/notebooks/adalflow_colab_template.ipynb @@ -87,8 +87,8 @@ "\n", "\n", "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "os.environ['GROQ_API_KEY'] = groq_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "os.environ[\"GROQ_API_KEY\"] = groq_api_key\n", "\n", "print(\"API keys have been set.\")" ] diff --git a/notebooks/evaluation/adalflow_llm_eval.ipynb b/notebooks/evaluation/adalflow_llm_eval.ipynb index 5e903978..448215e3 100644 --- a/notebooks/evaluation/adalflow_llm_eval.ipynb +++ b/notebooks/evaluation/adalflow_llm_eval.ipynb @@ -95,7 +95,7 @@ "\n", "\n", "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", "\n", "print(\"API keys have been set.\")" ] @@ -301,11 +301,11 @@ "source": [ "# without questions, and with customized judgement query\n", "\n", + "\n", "def compute_llm_as_judge_wo_questions():\n", " from adalflow.eval.llm_as_judge import LLMasJudge, DefaultLLMJudge\n", " from adalflow.components.model_client import OpenAIClient\n", "\n", - "\n", " llm_judge = DefaultLLMJudge(\n", " model_client=OpenAIClient(),\n", " model_kwargs={\n", @@ -413,7 +413,7 @@ " eval_rslt = llm_evaluator.compute(\n", " questions=questions, gt_answers=gt_answers, pred_answers=pred_answers\n", " )\n", - " print(eval_rslt)\n" + " print(eval_rslt)" ] }, { @@ -569,8 +569,12 @@ } ], "source": [ - "source=\"Paul Merson has restarted his row with Andros Townsend after the Tottenham midfielder was brought on with only seven minutes remaining in his team 's 0-0 draw with Burnley on Sunday . 'Just been watching the game , did you miss the coach ? # RubberDub # 7minutes , ' Merson put on Twitter . Merson initially angered Townsend for writing in his Sky Sports column that 'if Andros Townsend can get in ( the England team ) then it opens it up to anybody . ' Paul Merson had another dig at Andros Townsend after his appearance for Tottenham against Burnley Townsend was brought on in the 83rd minute for Tottenham as they drew 0-0 against Burnley Andros Townsend scores England 's equaliser in their 1-1 friendly draw with Italy in Turin on Tuesday night The former Arsenal man was proven wrong when Townsend hit a stunning equaliser for England against Italy and he duly admitted his mistake . 'It 's not as though I was watching hoping he would n't score for England , I 'm genuinely pleased for him and fair play to him \\u00e2\\u20ac\\u201c it was a great goal , ' Merson said . 'It 's just a matter of opinion , and my opinion was that he got pulled off after half an hour at Manchester United in front of Roy Hodgson , so he should n't have been in the squad . 'When I 'm wrong , I hold my hands up . I do n't have a problem with doing that - I 'll always be the first to admit when I 'm wrong . ' Townsend hit back at Merson on Twitter after scoring for England against Italy Sky Sports pundit Merson ( centre ) criticised Townsend 's call-up to the England squad last week Townsend hit back at Merson after netting for England in Turin on Wednesday , saying 'Not bad for a player that should be 'nowhere near the squad ' ay @ PaulMerse ? ' Any bad feeling between the pair seemed to have passed but Merson was unable to resist having another dig at Townsend after Tottenham drew at Turf Moor .\",\n", - "summary=\"Paul merson was brought on with only seven minutes remaining in his team 's 0-0 draw with burnley . Andros townsend scored the tottenham midfielder in the 89th minute . Paul merson had another dig at andros townsend after his appearance . The midfielder had been brought on to the england squad last week . Click here for all the latest arsenal news news .\",\n", + "source = (\n", + " \"Paul Merson has restarted his row with Andros Townsend after the Tottenham midfielder was brought on with only seven minutes remaining in his team 's 0-0 draw with Burnley on Sunday . 'Just been watching the game , did you miss the coach ? # RubberDub # 7minutes , ' Merson put on Twitter . Merson initially angered Townsend for writing in his Sky Sports column that 'if Andros Townsend can get in ( the England team ) then it opens it up to anybody . ' Paul Merson had another dig at Andros Townsend after his appearance for Tottenham against Burnley Townsend was brought on in the 83rd minute for Tottenham as they drew 0-0 against Burnley Andros Townsend scores England 's equaliser in their 1-1 friendly draw with Italy in Turin on Tuesday night The former Arsenal man was proven wrong when Townsend hit a stunning equaliser for England against Italy and he duly admitted his mistake . 'It 's not as though I was watching hoping he would n't score for England , I 'm genuinely pleased for him and fair play to him \\u00e2\\u20ac\\u201c it was a great goal , ' Merson said . 'It 's just a matter of opinion , and my opinion was that he got pulled off after half an hour at Manchester United in front of Roy Hodgson , so he should n't have been in the squad . 'When I 'm wrong , I hold my hands up . I do n't have a problem with doing that - I 'll always be the first to admit when I 'm wrong . ' Townsend hit back at Merson on Twitter after scoring for England against Italy Sky Sports pundit Merson ( centre ) criticised Townsend 's call-up to the England squad last week Townsend hit back at Merson after netting for England in Turin on Wednesday , saying 'Not bad for a player that should be 'nowhere near the squad ' ay @ PaulMerse ? ' Any bad feeling between the pair seemed to have passed but Merson was unable to resist having another dig at Townsend after Tottenham drew at Turf Moor .\",\n", + ")\n", + "summary = (\n", + " \"Paul merson was brought on with only seven minutes remaining in his team 's 0-0 draw with burnley . Andros townsend scored the tottenham midfielder in the 89th minute . Paul merson had another dig at andros townsend after his appearance . The midfielder had been brought on to the england squad last week . Click here for all the latest arsenal news news .\",\n", + ")\n", "\n", "compute_g_eval_summarization(source=source, summary=summary)" ] diff --git a/notebooks/qas/adalflow_object_count_auto_optimization.ipynb b/notebooks/qas/adalflow_object_count_auto_optimization.ipynb index ac7e3cbf..9308ea7f 100644 --- a/notebooks/qas/adalflow_object_count_auto_optimization.ipynb +++ b/notebooks/qas/adalflow_object_count_auto_optimization.ipynb @@ -137,12 +137,14 @@ "\n", "# Prompt user to enter their API keys securely\n", "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", - "groq_api_key = getpass(\"Please enter your GROQ API key, simplly press Enter if you don't have one: \")\n", + "groq_api_key = getpass(\n", + " \"Please enter your GROQ API key, simplly press Enter if you don't have one: \"\n", + ")\n", "\n", "\n", "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "os.environ['GROQ_API_KEY'] = groq_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "os.environ[\"GROQ_API_KEY\"] = groq_api_key\n", "\n", "print(\"API keys have been set.\")" ] @@ -209,6 +211,7 @@ "\n", "\"\"\"\n", "\n", + "\n", "class ObjectCountTaskPipeline(adal.Component):\n", " def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n", " super().__init__()\n", @@ -242,9 +245,7 @@ " self, question: str, id: str = None\n", " ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n", " output = self.llm_counter(prompt_kwargs={\"input_str\": question}, id=id)\n", - " return output\n", - "\n", - "\n" + " return output" ] }, { @@ -329,44 +330,42 @@ "from adalflow.components.model_client.groq_client import GroqAPIClient\n", "\n", "\n", - "if len(os.environ['OPENAI_API_KEY']) > 1:\n", - " gpt_3_model = {\n", - " \"model_client\": OpenAIClient(),\n", - " \"model_kwargs\": {\n", - " \"model\": \"gpt-3.5-turbo\",\n", - " \"max_tokens\": 2000,\n", - " \"temperature\": 0.0,\n", - " \"top_p\": 0.99,\n", - " \"frequency_penalty\": 0,\n", - " \"presence_penalty\": 0,\n", - " \"stop\": None,\n", - " },\n", - " }\n", - " gpt_4o_model = {\n", - " \"model_client\": OpenAIClient(),\n", - " \"model_kwargs\": {\n", - " \"model\": \"gpt-4o\",\n", - " \"max_tokens\": 4000,\n", - " \"temperature\": 0.0,\n", - " \"top_p\": 0.99,\n", - " \"frequency_penalty\": 0,\n", - " \"presence_penalty\": 0,\n", - " \"stop\": None,\n", - " },\n", - " }\n", + "if len(os.environ[\"OPENAI_API_KEY\"]) > 1:\n", + " gpt_3_model = {\n", + " \"model_client\": OpenAIClient(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"max_tokens\": 2000,\n", + " \"temperature\": 0.0,\n", + " \"top_p\": 0.99,\n", + " \"frequency_penalty\": 0,\n", + " \"presence_penalty\": 0,\n", + " \"stop\": None,\n", + " },\n", + " }\n", + " gpt_4o_model = {\n", + " \"model_client\": OpenAIClient(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-4o\",\n", + " \"max_tokens\": 4000,\n", + " \"temperature\": 0.0,\n", + " \"top_p\": 0.99,\n", + " \"frequency_penalty\": 0,\n", + " \"presence_penalty\": 0,\n", + " \"stop\": None,\n", + " },\n", + " }\n", "\n", - "if len(os.environ['GROQ_API_KEY']) > 1:\n", - " llama_3_1_model ={\n", - " \"model_client\": GroqAPIClient(),\n", - " \"model_kwargs\": {\n", - " \"model\": \"llama-3.1-8b-instant\"\n", - " }\n", - " }\n", + "if len(os.environ[\"GROQ_API_KEY\"]) > 1:\n", + " llama_3_1_model = {\n", + " \"model_client\": GroqAPIClient(),\n", + " \"model_kwargs\": {\"model\": \"llama-3.1-8b-instant\"},\n", + " }\n", "\n", "\n", "question = \"I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?\"\n", "task_pipeline = ObjectCountTaskPipeline(**gpt_3_model)\n", - "print(task_pipeline)\n" + "print(task_pipeline)" ] }, { @@ -467,6 +466,7 @@ "from adalflow.datasets.big_bench_hard import BigBenchHard\n", "from adalflow.utils.data import subset_dataset\n", "\n", + "\n", "def load_datasets(max_samples: int = None):\n", " \"\"\"Load the dataset\"\"\"\n", " train_data = BigBenchHard(split=\"train\")\n", @@ -479,7 +479,7 @@ " val_data = subset_dataset(val_data, max_samples)\n", " test_data = subset_dataset(test_data, max_samples)\n", "\n", - " return train_data, val_data, test_data\n" + " return train_data, val_data, test_data" ] }, { @@ -583,11 +583,11 @@ " def prepare_task(self, sample: Example):\n", " return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n", "\n", - " def prepare_eval(\n", - " self, sample: Example, y_pred: adal.GeneratorOutput\n", - " ) -> float:\n", + " def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:\n", " y_label = -1\n", - " if (y_pred is not None and y_pred.data is not None): # if y_pred and y_pred.data: might introduce bug when the data is 0\n", + " if (\n", + " y_pred is not None and y_pred.data is not None\n", + " ): # if y_pred and y_pred.data: might introduce bug when the data is 0\n", " y_label = y_pred.data\n", " return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}" ] @@ -820,7 +820,7 @@ "from adalflow.datasets.types import Example\n", "\n", "\n", - "class ObjectCountAdalComponent(adal.AdalComponent):# noqa: F811\n", + "class ObjectCountAdalComponent(adal.AdalComponent): # noqa: F811\n", " def __init__(\n", " self,\n", " model_client: adal.ModelClient,\n", @@ -844,12 +844,11 @@ " def prepare_task(self, sample: Example):\n", " return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n", "\n", - "\n", - " def prepare_eval(\n", - " self, sample: Example, y_pred: adal.GeneratorOutput\n", - " ) -> float:\n", + " def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:\n", " y_label = -1\n", - " if (y_pred is not None and y_pred.data is not None): # if y_pred and y_pred.data: might introduce bug when the data is 0\n", + " if (\n", + " y_pred is not None and y_pred.data is not None\n", + " ): # if y_pred and y_pred.data: might introduce bug when the data is 0\n", " y_label = y_pred.data\n", " return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}\n", "\n", @@ -891,7 +890,7 @@ " **gpt_3_model,\n", " teacher_model_config=gpt_4o_model,\n", " text_optimizer_model_config=gpt_4o_model,\n", - " backward_engine_model_config=gpt_4o_model\n", + " backward_engine_model_config=gpt_4o_model,\n", " )\n", " print(adal_component)\n", " trainer = adal.Trainer(\n", @@ -916,7 +915,7 @@ " test_dataset=test_dataset,\n", " debug=debug,\n", " resume_from_ckpt=resume_from_ckpt,\n", - " )\n" + " )" ] }, { @@ -3255,10 +3254,14 @@ } ], "source": [ - "train(debug=False, max_steps=12, strategy=\"constrained\",\n", - " raw_shots=0, bootstrap_shots=1,\n", - " exclude_input_fields_from_bootstrap_demos=True\n", - " )" + "train(\n", + " debug=False,\n", + " max_steps=12,\n", + " strategy=\"constrained\",\n", + " raw_shots=0,\n", + " bootstrap_shots=1,\n", + " exclude_input_fields_from_bootstrap_demos=True,\n", + ")" ] }, { @@ -6015,13 +6018,17 @@ } ], "source": [ - "\n", "ckpt_path = \"/content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\"\n", "\n", - "train(debug=False, max_steps=12, strategy=\"constrained\",\n", - " raw_shots=0, bootstrap_shots=1,\n", - " resume_from_ckpt=ckpt_path,\n", - " exclude_input_fields_from_bootstrap_demos=True)" + "train(\n", + " debug=False,\n", + " max_steps=12,\n", + " strategy=\"constrained\",\n", + " raw_shots=0,\n", + " bootstrap_shots=1,\n", + " resume_from_ckpt=ckpt_path,\n", + " exclude_input_fields_from_bootstrap_demos=True,\n", + ")" ] }, { @@ -8038,11 +8045,15 @@ } ], "source": [ - "\n", - "train(debug=False, max_steps=12, strategy=\"random\",\n", - " raw_shots=0, bootstrap_shots=1,\n", - " resume_from_ckpt=ckpt_path,\n", - " exclude_input_fields_from_bootstrap_demos=False)" + "train(\n", + " debug=False,\n", + " max_steps=12,\n", + " strategy=\"random\",\n", + " raw_shots=0,\n", + " bootstrap_shots=1,\n", + " resume_from_ckpt=ckpt_path,\n", + " exclude_input_fields_from_bootstrap_demos=False,\n", + ")" ] }, { diff --git a/notebooks/tutorials/adalflow_classification_optimization.ipynb b/notebooks/tutorials/adalflow_classification_optimization.ipynb index 0afb97df..c6bddc7e 100644 --- a/notebooks/tutorials/adalflow_classification_optimization.ipynb +++ b/notebooks/tutorials/adalflow_classification_optimization.ipynb @@ -1,463 +1,461 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# ๐Ÿค— Welcome to AdalFlow!\n", - "## The PyTorch library to auto-optimize any LLM task pipelines\n", - "\n", - "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of ๐Ÿ˜Š any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! โญ Star us on Github โญ\n", - "\n", - "\n", - "# Quick Links\n", - "\n", - "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n", - "\n", - "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n", - "\n", - "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n", - "\n", - "Common use cases along with the auto-optimization: check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n", - "\n", - "## ๐Ÿ“– Outline\n", - "\n", - "This is the code for a classification optimization tutorial ![image.png]()\n" - ], - "metadata": { - "id": "xHF95Kr4CzGq" - } - }, - { - "cell_type": "markdown", - "source": [ - "\n", - "# Installation\n", - "\n", - "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq` from the extra packages.\n", - "\n", - " ```bash\n", - " pip install adalflow[openai,groq]\n", - " ```\n", - "2. Setup `openai` and `groq` API key in the environment variables\n", - "\n", - "You can choose to use different client. You can import the model client you prefer. We support `Anthropic`, `Cohere`, `Google`, `GROQ`, `OpenAI`, `Transformer` and more in development. We will use OpenAI here as an example.Please refer to our [full installation guide](https://adalflow.sylph.ai/get_started/installation.html)" - ], - "metadata": { - "id": "Kof5M6DRaKhh" - } - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "id": "tAp3eDjOCma1" - }, - "outputs": [], - "source": [ - "from IPython.display import clear_output\n", - "\n", - "!pip install -U adalflow[openai] # also install the package for the model client you'll use\n", - "!pip install datasets\n", - "clear_output()" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Set Environment Variables\n", - "\n", - "Run the following code and pass your api key.\n", - "\n", - "Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n", - "\n", - "*Go to [OpenAI](https://platform.openai.com/docs/introduction) to get API keys if you don't already have.*" - ], - "metadata": { - "id": "KapUyHMM07pJ" - } - }, - { - "cell_type": "code", - "source": [ - "import os\n", - "\n", - "from getpass import getpass\n", - "\n", - "# Prompt user to enter their API keys securely\n", - "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", - "\n", - "\n", - "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "\n", - "print(\"API keys have been set.\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ONfzF9Puzdd_", - "outputId": "e5c3cfc5-69cb-448a-c248-a8cebda5ba71" - }, - "execution_count": 43, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", - "API keys have been set.\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "from dataclasses import dataclass, field\n", - "from typing import List, Dict, Union, Optional, Tuple, Any, Callable\n", - "from datasets import load_dataset\n", - "from adalflow.components.model_client import OpenAIClient\n", - "import adalflow as adal\n", - "from adalflow.core.component import Component\n", - "from adalflow.datasets.types import TrecData\n", - "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n", - "\n", - "\n", - "_COARSE_LABELS = [\n", - " \"ABBR\",\n", - " \"DESC\",\n", - " \"ENTY\",\n", - " \"HUM\",\n", - " \"LOC\",\n", - " \"NUM\"\n", - "]\n", - "\n", - "_COARSE_LABELS_DESC = [\n", - " \"Abbreviation: Questions about abbreviations and their meanings\",\n", - " \"Description: Questions seeking descriptions of people, things, or concepts\",\n", - " \"Entity: Questions about entities (e.g., animals, colors, inventions)\",\n", - " \"Human: Questions about people or organizations\",\n", - " \"Location: Questions about places, cities, countries\",\n", - " \"Numeric: Questions seeking numeric answers (e.g., dates, amounts, distances)\"\n", - "]\n", - "\n", - "\n", - "template = r\"\"\"\n", - " {{system_prompt}}\n", - " {% if output_format_str is not none %}\n", - " {{output_format_str}}\n", - " {% endif %}\n", - " {% if few_shot_demos is not none %}\n", - " Here are some examples:\n", - " {{few_shot_demos}}\n", - " {% endif %}\n", - " \n", - " \n", - " {{input_str}}\n", - " \n", - " \"\"\"\n", - "\n", - "task_desc_template = r\"\"\"You are a classifier. Given a question, you need to classify it into one of the following classes:\n", - " Format: class_index. class_name, class_description\n", - " {% if classes %}\n", - " {% for class in classes %}\n", - " {{loop.index-1}}. {{class.label}}, {{class.desc}}\n", - " {% endfor %}\n", - " {% endif %}\n", - " - Do not try to answer the question:\n", - " \"\"\"\n", - "\n", - "@dataclass\n", - "class TRECExtendedData(TrecData):\n", - " rationale: str = field(\n", - " metadata={\n", - " \"desc\": \"Your step-by-step reasoning to classify the question to class_name\"\n", - " },\n", - " default=None,\n", - " )\n", - " __input_fields__ = [\"question\"]\n", - " __output_fields__ = [\"rationale\", \"class_name\"] # it is important to have the rationale before the class_name" - ], - "metadata": { - "id": "ZZIEtZYHNVjo" - }, - "execution_count": 49, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "class TRECClassifierStructuredOutput(adal.Component):\n", - "\n", - " def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n", - " super().__init__()\n", - "\n", - " label_desc = [\n", - " {\"label\": label, \"desc\": desc}\n", - " for label, desc in zip(_COARSE_LABELS, _COARSE_LABELS_DESC)\n", - " ]\n", - "\n", - " task_desc_str = adal.Prompt(\n", - " template=task_desc_template, prompt_kwargs={\"classes\": label_desc}\n", - " )()\n", - "\n", - " self.data_class = TRECExtendedData\n", - " self.data_class.set_task_desc(task_desc_str)\n", - "\n", - " self.parser = adal.DataClassParser(\n", - " data_class=self.data_class, return_data_class=True, format_type=\"yaml\"\n", - " )\n", - "\n", - " prompt_kwargs = {\n", - " \"system_prompt\": adal.Parameter(\n", - " data=self.parser.get_task_desc_str(),\n", - " role_desc=\"Task description\",\n", - " requires_opt=True,\n", - " param_type=adal.ParameterType.PROMPT,\n", - " ),\n", - " \"output_format_str\": adal.Parameter(\n", - " data=self.parser.get_output_format_str(),\n", - " role_desc=\"Output format requirements\",\n", - " requires_opt=False,\n", - " param_type=adal.ParameterType.PROMPT,\n", - " ),\n", - " \"few_shot_demos\": adal.Parameter(\n", - " data=None,\n", - " requires_opt=True,\n", - " role_desc=\"Few shot examples to help the model\",\n", - " param_type=adal.ParameterType.DEMOS,\n", - " ),\n", - " }\n", - "\n", - " self.llm = adal.Generator(\n", - " model_client=model_client,\n", - " model_kwargs=model_kwargs,\n", - " prompt_kwargs=prompt_kwargs,\n", - " template=template,\n", - " output_processors=self.parser,\n", - " use_cache=True,\n", - " )\n", - "\n", - " def _prepare_input(self, question: str):\n", - " input_data = self.data_class(question=question)\n", - " input_str = self.parser.get_input_str(input_data)\n", - " prompt_kwargs = {\n", - " \"input_str\": adal.Parameter(\n", - " data=input_str, requires_opt=False, role_desc=\"input to the LLM\"\n", - " )\n", - " }\n", - " return prompt_kwargs\n", - "\n", - " def call(\n", - " self, question: str, id: Optional[str] = None\n", - " ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n", - " prompt_kwargs = self._prepare_input(question)\n", - " output = self.llm(prompt_kwargs=prompt_kwargs, id=id)\n", - " return output" - ], - "metadata": { - "id": "3Q3H9XC4Ncfi" - }, - "execution_count": 50, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "class TrecClassifierAdal(adal.AdalComponent):\n", - " def __init__(\n", - " self,\n", - " model_client: adal.ModelClient,\n", - " model_kwargs: Dict,\n", - " teacher_model_config: Dict,\n", - " backward_engine_model_config: Dict,\n", - " text_optimizer_model_config: Dict,\n", - " ):\n", - " task = TRECClassifierStructuredOutput(model_client, model_kwargs)\n", - " eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n", - " loss_fn = adal.EvalFnToTextLoss(\n", - " eval_fn=eval_fn,\n", - " eval_fn_desc=\"exact_match: 1 if str(y) == str(y_gt) else 0\",\n", - " )\n", - " super().__init__(\n", - " task=task,\n", - " eval_fn=eval_fn,\n", - " loss_fn=loss_fn,\n", - " backward_engine_model_config=backward_engine_model_config,\n", - " text_optimizer_model_config=text_optimizer_model_config,\n", - " teacher_model_config=teacher_model_config,\n", - " )\n", - "\n", - " def prepare_task(self, sample: TRECExtendedData):\n", - " return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n", - "\n", - " def prepare_eval(\n", - " self, sample: TRECExtendedData, y_pred: adal.GeneratorOutput\n", - " ) -> float:\n", - " y_label = -1\n", - " if y_pred and y_pred.data is not None and y_pred.data.class_name is not None:\n", - " y_label = y_pred.data.class_name\n", - " return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.class_name}\n", - "\n", - " def prepare_loss(\n", - " self, sample: TRECExtendedData, y_pred: adal.Parameter, *args, **kwargs\n", - " ) -> Tuple[Callable[..., Any], Dict]:\n", - " full_response = y_pred.full_response\n", - " y_label = -1\n", - " if (\n", - " full_response\n", - " and full_response.data is not None\n", - " and full_response.data.class_name is not None\n", - " ):\n", - " y_label = full_response.data.class_name\n", - "\n", - " y_pred.eval_input = y_label\n", - " y_gt = adal.Parameter(\n", - " name=\"y_gt\",\n", - " data=sample.class_name,\n", - " eval_input=sample.class_name,\n", - " requires_opt=False,\n", - " )\n", - " return self.loss_fn, {\"kwargs\": {\"y\": y_pred, \"y_gt\": y_gt}}" - ], - "metadata": { - "id": "HpkQYsh2NevT" - }, - "execution_count": 51, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def train(\n", - " model_client: adal.ModelClient,\n", - " model_kwargs: Dict,\n", - " train_batch_size=4,\n", - " raw_shots: int = 0,\n", - " bootstrap_shots: int = 1,\n", - " max_steps=12,\n", - " num_workers=4,\n", - " strategy=\"constrained\",\n", - " optimization_order=\"sequential\",\n", - " debug=False,\n", - "):\n", - " print(\"Starting training process...\")\n", - "\n", - " # Define the model configuration for all components\n", - " gpt_4o_model = {\n", - " \"model\": \"gpt-4-turbo-preview\",\n", - " \"temperature\": 0,\n", - " \"max_tokens\": 1000,\n", - " \"top_p\": 1,\n", - " \"frequency_penalty\": 0,\n", - " \"presence_penalty\": 0\n", - " }\n", - " print(f\"Component model configuration: {gpt_4o_model}\")\n", - "\n", - " try:\n", - " print(\"Initializing ADAL component...\")\n", - " adal_component = TrecClassifierAdal(\n", - " model_client=model_client,\n", - " model_kwargs=model_kwargs,\n", - " text_optimizer_model_config=gpt_4o_model,\n", - " backward_engine_model_config=gpt_4o_model,\n", - " teacher_model_config=gpt_4o_model,\n", - " )\n", - " print(\"ADAL component initialized successfully\")\n", - "\n", - " print(\"Initializing trainer...\")\n", - " trainer = adal.Trainer(\n", - " train_batch_size=train_batch_size,\n", - " adaltask=adal_component,\n", - " strategy=strategy,\n", - " max_steps=max_steps,\n", - " num_workers=num_workers,\n", - " raw_shots=raw_shots,\n", - " bootstrap_shots=bootstrap_shots,\n", - " debug=debug,\n", - " weighted_sampling=True,\n", - " optimization_order=optimization_order,\n", - " exclude_input_fields_from_bootstrap_demos=True,\n", - " )\n", - " print(\"Trainer initialized successfully\")\n", - "\n", - " print(\"Loading datasets...\")\n", - " train_dataset, val_dataset, test_dataset = load_datasets()\n", - " print(f\"Datasets loaded - Train size: {len(train_dataset)}, Val size: {len(val_dataset)}, Test size: {len(test_dataset)}\")\n", - "\n", - " print(\"Starting model training...\")\n", - " trainer.fit(\n", - " train_dataset=train_dataset,\n", - " val_dataset=test_dataset,\n", - " debug=debug,\n", - " )\n", - " print(\"Training completed successfully\")\n", - "\n", - " except Exception as e:\n", - " print(f\"Error occurred: {str(e)}\")\n", - " raise" - ], - "metadata": { - "id": "PEj6xiZ5dVaj" - }, - "execution_count": 52, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "from adalflow.components.model_client.openai_client import OpenAIClient\n", - "\n", - "\n", - "gpt_4o_model = {\n", - " \"model_client\": OpenAIClient(),\n", - " \"model_kwargs\": {\n", - " \"model\": \"gpt-4o-mini\",\n", - " \"max_tokens\": 2000,\n", - "\n", - " },\n", - "}\n", - "\n", - "\n", - "train(\n", - " model_client=OpenAIClient(),\n", - " model_kwargs=gpt_4o_model,\n", - " )" - ], - "metadata": { - "id": "GnlZBQOMEj6E", - "collapsed": true - }, - "execution_count": null, - "outputs": [] + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# ๐Ÿค— Welcome to AdalFlow!\n", + "## The PyTorch library to auto-optimize any LLM task pipelines\n", + "\n", + "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of ๐Ÿ˜Š any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! โญ Star us on Github โญ\n", + "\n", + "\n", + "# Quick Links\n", + "\n", + "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n", + "\n", + "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n", + "\n", + "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n", + "\n", + "Common use cases along with the auto-optimization: check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n", + "\n", + "## ๐Ÿ“– Outline\n", + "\n", + "This is the code for a classification optimization tutorial ![image.png]()\n" + ], + "metadata": { + "id": "xHF95Kr4CzGq" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "# Installation\n", + "\n", + "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq` from the extra packages.\n", + "\n", + " ```bash\n", + " pip install adalflow[openai,groq]\n", + " ```\n", + "2. Setup `openai` and `groq` API key in the environment variables\n", + "\n", + "You can choose to use different client. You can import the model client you prefer. We support `Anthropic`, `Cohere`, `Google`, `GROQ`, `OpenAI`, `Transformer` and more in development. We will use OpenAI here as an example.Please refer to our [full installation guide](https://adalflow.sylph.ai/get_started/installation.html)" + ], + "metadata": { + "id": "Kof5M6DRaKhh" + } + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "id": "tAp3eDjOCma1" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai] # also install the package for the model client you'll use\n", + "!pip install datasets\n", + "clear_output()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Set Environment Variables\n", + "\n", + "Run the following code and pass your api key.\n", + "\n", + "Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n", + "\n", + "*Go to [OpenAI](https://platform.openai.com/docs/introduction) to get API keys if you don't already have.*" + ], + "metadata": { + "id": "KapUyHMM07pJ" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "\n", + "\n", + "# Set environment variables\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "\n", + "print(\"API keys have been set.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ONfzF9Puzdd_", + "outputId": "e5c3cfc5-69cb-448a-c248-a8cebda5ba71" + }, + "execution_count": 43, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "# Issues and feedback\n", - "\n", - "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n", - "\n", - "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)." - ], - "metadata": { - "id": "AmkbyxmuruUu" - } + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", + "API keys have been set.\n" + ] } - ] + ] + }, + { + "cell_type": "code", + "source": [ + "from dataclasses import dataclass, field\n", + "from typing import List, Dict, Union, Optional, Tuple, Any, Callable\n", + "from datasets import load_dataset\n", + "from adalflow.components.model_client import OpenAIClient\n", + "import adalflow as adal\n", + "from adalflow.core.component import Component\n", + "from adalflow.datasets.types import TrecData\n", + "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n", + "\n", + "\n", + "_COARSE_LABELS = [\"ABBR\", \"DESC\", \"ENTY\", \"HUM\", \"LOC\", \"NUM\"]\n", + "\n", + "_COARSE_LABELS_DESC = [\n", + " \"Abbreviation: Questions about abbreviations and their meanings\",\n", + " \"Description: Questions seeking descriptions of people, things, or concepts\",\n", + " \"Entity: Questions about entities (e.g., animals, colors, inventions)\",\n", + " \"Human: Questions about people or organizations\",\n", + " \"Location: Questions about places, cities, countries\",\n", + " \"Numeric: Questions seeking numeric answers (e.g., dates, amounts, distances)\",\n", + "]\n", + "\n", + "\n", + "template = r\"\"\"\n", + " {{system_prompt}}\n", + " {% if output_format_str is not none %}\n", + " {{output_format_str}}\n", + " {% endif %}\n", + " {% if few_shot_demos is not none %}\n", + " Here are some examples:\n", + " {{few_shot_demos}}\n", + " {% endif %}\n", + " \n", + " \n", + " {{input_str}}\n", + " \n", + " \"\"\"\n", + "\n", + "task_desc_template = r\"\"\"You are a classifier. Given a question, you need to classify it into one of the following classes:\n", + " Format: class_index. class_name, class_description\n", + " {% if classes %}\n", + " {% for class in classes %}\n", + " {{loop.index-1}}. {{class.label}}, {{class.desc}}\n", + " {% endfor %}\n", + " {% endif %}\n", + " - Do not try to answer the question:\n", + " \"\"\"\n", + "\n", + "\n", + "@dataclass\n", + "class TRECExtendedData(TrecData):\n", + " rationale: str = field(\n", + " metadata={\n", + " \"desc\": \"Your step-by-step reasoning to classify the question to class_name\"\n", + " },\n", + " default=None,\n", + " )\n", + " __input_fields__ = [\"question\"]\n", + " __output_fields__ = [\n", + " \"rationale\",\n", + " \"class_name\",\n", + " ] # it is important to have the rationale before the class_name" + ], + "metadata": { + "id": "ZZIEtZYHNVjo" + }, + "execution_count": 49, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "class TRECClassifierStructuredOutput(adal.Component):\n", + "\n", + " def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n", + " super().__init__()\n", + "\n", + " label_desc = [\n", + " {\"label\": label, \"desc\": desc}\n", + " for label, desc in zip(_COARSE_LABELS, _COARSE_LABELS_DESC)\n", + " ]\n", + "\n", + " task_desc_str = adal.Prompt(\n", + " template=task_desc_template, prompt_kwargs={\"classes\": label_desc}\n", + " )()\n", + "\n", + " self.data_class = TRECExtendedData\n", + " self.data_class.set_task_desc(task_desc_str)\n", + "\n", + " self.parser = adal.DataClassParser(\n", + " data_class=self.data_class, return_data_class=True, format_type=\"yaml\"\n", + " )\n", + "\n", + " prompt_kwargs = {\n", + " \"system_prompt\": adal.Parameter(\n", + " data=self.parser.get_task_desc_str(),\n", + " role_desc=\"Task description\",\n", + " requires_opt=True,\n", + " param_type=adal.ParameterType.PROMPT,\n", + " ),\n", + " \"output_format_str\": adal.Parameter(\n", + " data=self.parser.get_output_format_str(),\n", + " role_desc=\"Output format requirements\",\n", + " requires_opt=False,\n", + " param_type=adal.ParameterType.PROMPT,\n", + " ),\n", + " \"few_shot_demos\": adal.Parameter(\n", + " data=None,\n", + " requires_opt=True,\n", + " role_desc=\"Few shot examples to help the model\",\n", + " param_type=adal.ParameterType.DEMOS,\n", + " ),\n", + " }\n", + "\n", + " self.llm = adal.Generator(\n", + " model_client=model_client,\n", + " model_kwargs=model_kwargs,\n", + " prompt_kwargs=prompt_kwargs,\n", + " template=template,\n", + " output_processors=self.parser,\n", + " use_cache=True,\n", + " )\n", + "\n", + " def _prepare_input(self, question: str):\n", + " input_data = self.data_class(question=question)\n", + " input_str = self.parser.get_input_str(input_data)\n", + " prompt_kwargs = {\n", + " \"input_str\": adal.Parameter(\n", + " data=input_str, requires_opt=False, role_desc=\"input to the LLM\"\n", + " )\n", + " }\n", + " return prompt_kwargs\n", + "\n", + " def call(\n", + " self, question: str, id: Optional[str] = None\n", + " ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n", + " prompt_kwargs = self._prepare_input(question)\n", + " output = self.llm(prompt_kwargs=prompt_kwargs, id=id)\n", + " return output" + ], + "metadata": { + "id": "3Q3H9XC4Ncfi" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "class TrecClassifierAdal(adal.AdalComponent):\n", + " def __init__(\n", + " self,\n", + " model_client: adal.ModelClient,\n", + " model_kwargs: Dict,\n", + " teacher_model_config: Dict,\n", + " backward_engine_model_config: Dict,\n", + " text_optimizer_model_config: Dict,\n", + " ):\n", + " task = TRECClassifierStructuredOutput(model_client, model_kwargs)\n", + " eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n", + " loss_fn = adal.EvalFnToTextLoss(\n", + " eval_fn=eval_fn,\n", + " eval_fn_desc=\"exact_match: 1 if str(y) == str(y_gt) else 0\",\n", + " )\n", + " super().__init__(\n", + " task=task,\n", + " eval_fn=eval_fn,\n", + " loss_fn=loss_fn,\n", + " backward_engine_model_config=backward_engine_model_config,\n", + " text_optimizer_model_config=text_optimizer_model_config,\n", + " teacher_model_config=teacher_model_config,\n", + " )\n", + "\n", + " def prepare_task(self, sample: TRECExtendedData):\n", + " return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n", + "\n", + " def prepare_eval(\n", + " self, sample: TRECExtendedData, y_pred: adal.GeneratorOutput\n", + " ) -> float:\n", + " y_label = -1\n", + " if y_pred and y_pred.data is not None and y_pred.data.class_name is not None:\n", + " y_label = y_pred.data.class_name\n", + " return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.class_name}\n", + "\n", + " def prepare_loss(\n", + " self, sample: TRECExtendedData, y_pred: adal.Parameter, *args, **kwargs\n", + " ) -> Tuple[Callable[..., Any], Dict]:\n", + " full_response = y_pred.full_response\n", + " y_label = -1\n", + " if (\n", + " full_response\n", + " and full_response.data is not None\n", + " and full_response.data.class_name is not None\n", + " ):\n", + " y_label = full_response.data.class_name\n", + "\n", + " y_pred.eval_input = y_label\n", + " y_gt = adal.Parameter(\n", + " name=\"y_gt\",\n", + " data=sample.class_name,\n", + " eval_input=sample.class_name,\n", + " requires_opt=False,\n", + " )\n", + " return self.loss_fn, {\"kwargs\": {\"y\": y_pred, \"y_gt\": y_gt}}" + ], + "metadata": { + "id": "HpkQYsh2NevT" + }, + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def train(\n", + " model_client: adal.ModelClient,\n", + " model_kwargs: Dict,\n", + " train_batch_size=4,\n", + " raw_shots: int = 0,\n", + " bootstrap_shots: int = 1,\n", + " max_steps=12,\n", + " num_workers=4,\n", + " strategy=\"constrained\",\n", + " optimization_order=\"sequential\",\n", + " debug=False,\n", + "):\n", + " print(\"Starting training process...\")\n", + "\n", + " # Define the model configuration for all components\n", + " gpt_4o_model = {\n", + " \"model\": \"gpt-4-turbo-preview\",\n", + " \"temperature\": 0,\n", + " \"max_tokens\": 1000,\n", + " \"top_p\": 1,\n", + " \"frequency_penalty\": 0,\n", + " \"presence_penalty\": 0,\n", + " }\n", + " print(f\"Component model configuration: {gpt_4o_model}\")\n", + "\n", + " try:\n", + " print(\"Initializing ADAL component...\")\n", + " adal_component = TrecClassifierAdal(\n", + " model_client=model_client,\n", + " model_kwargs=model_kwargs,\n", + " text_optimizer_model_config=gpt_4o_model,\n", + " backward_engine_model_config=gpt_4o_model,\n", + " teacher_model_config=gpt_4o_model,\n", + " )\n", + " print(\"ADAL component initialized successfully\")\n", + "\n", + " print(\"Initializing trainer...\")\n", + " trainer = adal.Trainer(\n", + " train_batch_size=train_batch_size,\n", + " adaltask=adal_component,\n", + " strategy=strategy,\n", + " max_steps=max_steps,\n", + " num_workers=num_workers,\n", + " raw_shots=raw_shots,\n", + " bootstrap_shots=bootstrap_shots,\n", + " debug=debug,\n", + " weighted_sampling=True,\n", + " optimization_order=optimization_order,\n", + " exclude_input_fields_from_bootstrap_demos=True,\n", + " )\n", + " print(\"Trainer initialized successfully\")\n", + "\n", + " print(\"Loading datasets...\")\n", + " train_dataset, val_dataset, test_dataset = load_datasets()\n", + " print(\n", + " f\"Datasets loaded - Train size: {len(train_dataset)}, Val size: {len(val_dataset)}, Test size: {len(test_dataset)}\"\n", + " )\n", + "\n", + " print(\"Starting model training...\")\n", + " trainer.fit(\n", + " train_dataset=train_dataset,\n", + " val_dataset=test_dataset,\n", + " debug=debug,\n", + " )\n", + " print(\"Training completed successfully\")\n", + "\n", + " except Exception as e:\n", + " print(f\"Error occurred: {str(e)}\")\n", + " raise" + ], + "metadata": { + "id": "PEj6xiZ5dVaj" + }, + "execution_count": 52, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.components.model_client.openai_client import OpenAIClient\n", + "\n", + "\n", + "gpt_4o_model = {\n", + " \"model_client\": OpenAIClient(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-4o-mini\",\n", + " \"max_tokens\": 2000,\n", + " },\n", + "}\n", + "\n", + "\n", + "train(\n", + " model_client=OpenAIClient(),\n", + " model_kwargs=gpt_4o_model,\n", + ")" + ], + "metadata": { + "id": "GnlZBQOMEj6E", + "collapsed": true + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Issues and feedback\n", + "\n", + "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n", + "\n", + "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)." + ], + "metadata": { + "id": "AmkbyxmuruUu" + } + } + ] } diff --git a/notebooks/tutorials/adalflow_component.ipynb b/notebooks/tutorials/adalflow_component.ipynb index 2da8aa78..8523a629 100644 --- a/notebooks/tutorials/adalflow_component.ipynb +++ b/notebooks/tutorials/adalflow_component.ipynb @@ -1,985 +1,994 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ๐Ÿค— Welcome to AdalFlow!\n", - "## The library to build & auto-optimize any LLM task pipelines\n", - "\n", - "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of ๐Ÿ˜Š any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! โญ Star us on Github โญ\n", - "\n", - "\n", - "# Quick Links\n", - "\n", - "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n", - "\n", - "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n", - "\n", - "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n", - "\n", - "Common use cases along with the auto-optimization: check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n", - "\n", - "# Author\n", - "\n", - "This notebook was created by community contributor [Ajith](https://github.com/ajithvcoder).\n", - "\n", - "# Outline\n", - "\n", - "This is a quick introduction of what AdalFlow is capable of. We will cover:\n", - "\n", - "* How to use `DataClass` with `DataClassParser`.\n", - "* How to do nested dataclass, we will test both one and two levels of nesting.\n", - "\n", - "**Next: Try our [auto-optimization](https://colab.research.google.com/drive/1n3mHUWekTEYHiBdYBTw43TKlPN41A9za?usp=sharing)**\n", - "\n", - "\n", - "# Installation\n", - "\n", - "1. Use `pip` to install the `adalflow` Python package. We will need `openai` and `groq`from the extra packages.\n", - "\n", - " ```bash\n", - " pip install adalflow[openai,groq]\n", - " ```\n", - "2. Setup `openai` and `groq` API key in the environment variables" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "Ab_OmE6XTl4h" - }, - "outputs": [], - "source": [ - "from IPython.display import clear_output\n", - "\n", - "!pip install -U adalflow[openai,groq,datasets]\n", - "\n", - "clear_output()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "PbAIsBeeTQUk" - }, - "outputs": [], - "source": [ - "import re\n", - "from adalflow.core import Component, Generator\n", - "from adalflow.components.model_client import OpenAIClient\n", - "from adalflow.components.model_client import GroqAPIClient\n", - "from adalflow.utils import setup_env # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "kRymwpwHTQUm", - "outputId": "6a992f52-1661-4002-ef74-ed26938c6baa" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", - "API keys have been set.\n" - ] - } - ], - "source": [ - "from getpass import getpass\n", - "import os\n", - "\n", - "# Prompt user to enter their API keys securely\n", - "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", - "\n", - "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "\n", - "print(\"API keys have been set.\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "czGDvnVUTQUm" - }, - "outputs": [], - "source": [ - "template_doc = r\"\"\" You are a doctor User: {{input_str}}\"\"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PPs3gHqeTQUn" - }, - "source": [ - "Let's turn on the library log to help with debugging." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "98QNsOcSTQUn", - "outputId": "d63cba1b-6087-4b04-bb2b-0a9d9d4500a5" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from adalflow.utils import get_logger\n", - "get_logger()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "b3ey1lozTQUo" - }, - "outputs": [], - "source": [ - "#Toy example\n", - "\n", - "class DocQA(Component):\n", - " def __init__(self):\n", - " super(DocQA, self).__init__()\n", - " self.doc = Generator(\n", - " template=template_doc,\n", - " model_client=OpenAIClient(),\n", - " model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n", - " )\n", - "\n", - " def call(self, query: str) -> str:\n", - " return self.doc(prompt_kwargs={\"input_str\": query}).data\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "TZAHSrbUTQUo", - "outputId": "66e81fb3-17f9-4570-dbbd-681cad1afc65" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:40:52 - prompt_builder - INFO - [prompt_builder.py:65:__init__] - Prompt has variables: ['input_str']\n", - "2024-11-11 17:40:52 - generator - INFO - [generator.py:144:__init__] - Generator Generator initialized.\n" - ] - } - ], - "source": [ - "doc = DocQA()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "f-y6l44PTQUp", - "outputId": "e24aabd5-d758-4700-fa0d-46b66a88c412" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'type': 'DocQA', 'data': {'_components': {'_ordered_dict': True, 'data': [('doc', {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': , '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': .default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'DocQA', '_init_args': {}}}\n" - ] - }, - { - "data": { - "text/plain": [ - "{'_components': OrderedDict([('doc',\n", - " Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", - " (model_client): OpenAIClient()\n", - " ))]),\n", - " '_parameters': OrderedDict(),\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'DocQA',\n", - " '_init_args': {}}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# states\n", - "states = doc.to_dict()\n", - "print(states)\n", - "doc.__dict__" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ๐Ÿค— Welcome to AdalFlow!\n", + "## The library to build & auto-optimize any LLM task pipelines\n", + "\n", + "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of ๐Ÿ˜Š any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! โญ Star us on Github โญ\n", + "\n", + "\n", + "# Quick Links\n", + "\n", + "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n", + "\n", + "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n", + "\n", + "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n", + "\n", + "Common use cases along with the auto-optimization: check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n", + "\n", + "# Author\n", + "\n", + "This notebook was created by community contributor [Ajith](https://github.com/ajithvcoder).\n", + "\n", + "# Outline\n", + "\n", + "This is a quick introduction of what AdalFlow is capable of. We will cover:\n", + "\n", + "* How to use `DataClass` with `DataClassParser`.\n", + "* How to do nested dataclass, we will test both one and two levels of nesting.\n", + "\n", + "**Next: Try our [auto-optimization](https://colab.research.google.com/drive/1n3mHUWekTEYHiBdYBTw43TKlPN41A9za?usp=sharing)**\n", + "\n", + "\n", + "# Installation\n", + "\n", + "1. Use `pip` to install the `adalflow` Python package. We will need `openai` and `groq`from the extra packages.\n", + "\n", + " ```bash\n", + " pip install adalflow[openai,groq]\n", + " ```\n", + "2. Setup `openai` and `groq` API key in the environment variables" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "Ab_OmE6XTl4h" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai,groq,datasets]\n", + "\n", + "clear_output()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "PbAIsBeeTQUk" + }, + "outputs": [], + "source": [ + "import re\n", + "from adalflow.core import Component, Generator\n", + "from adalflow.components.model_client import OpenAIClient\n", + "from adalflow.components.model_client import GroqAPIClient\n", + "from adalflow.utils import (\n", + " setup_env,\n", + ") # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "kRymwpwHTQUm", + "outputId": "6a992f52-1661-4002-ef74-ed26938c6baa" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "z_sH59_bTQUp" - }, - "source": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", + "API keys have been set.\n" + ] + } + ], + "source": [ + "from getpass import getpass\n", + "import os\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "\n", + "# Set environment variables\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "\n", + "print(\"API keys have been set.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "czGDvnVUTQUm" + }, + "outputs": [], + "source": [ + "template_doc = r\"\"\" You are a doctor User: {{input_str}}\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PPs3gHqeTQUn" + }, + "source": [ + "Let's turn on the library log to help with debugging." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "98QNsOcSTQUn", + "outputId": "d63cba1b-6087-4b04-bb2b-0a9d9d4500a5" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "P81kIS2qTQUp", - "outputId": "d8e0e398-d704-4a85-8692-66a8c570b910" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Generator, {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': , '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': .default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}}\n", - "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Prompt, {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}\n", - "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}\n", - "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}\n" - ] - }, - { - "data": { - "text/plain": [ - "{'_components': OrderedDict([('doc',\n", - " Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", - " (model_client): OpenAIClient()\n", - " ))]),\n", - " '_parameters': OrderedDict(),\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'DocQA',\n", - " '_init_args': {}}" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# restore the states\n", - "doc2 = DocQA.from_dict(states)\n", - "# print(doc2.call(\"What is the capital of France?\"))\n", - "doc2.__dict__\n", - "# doc2.to_dict()" + "data": { + "text/plain": [ + "" ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from adalflow.utils import get_logger\n", + "\n", + "get_logger()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "b3ey1lozTQUo" + }, + "outputs": [], + "source": [ + "# Toy example\n", + "\n", + "\n", + "class DocQA(Component):\n", + " def __init__(self):\n", + " super(DocQA, self).__init__()\n", + " self.doc = Generator(\n", + " template=template_doc,\n", + " model_client=OpenAIClient(),\n", + " model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n", + " )\n", + "\n", + " def call(self, query: str) -> str:\n", + " return self.doc(prompt_kwargs={\"input_str\": query}).data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "TZAHSrbUTQUo", + "outputId": "66e81fb3-17f9-4570-dbbd-681cad1afc65" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "198xYpLGTQUp", - "outputId": "ffd33d12-6db0-45c2-dfb1-3d57460ad4c9" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'type': 'DocQA',\n", - " 'data': {'_components': {'_ordered_dict': True,\n", - " 'data': [('doc',\n", - " {'type': 'Generator',\n", - " 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n", - " 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n", - " 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n", - " 'cache': ,\n", - " '_components': {'_ordered_dict': True,\n", - " 'data': [('prompt',\n", - " {'type': 'Prompt',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'Prompt',\n", - " '_init_args': {'template': None, 'prompt_kwargs': {}},\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_variables': ['input_str'],\n", - " 'prompt_kwargs': {}}}),\n", - " ('model_client',\n", - " {'type': 'OpenAIClient',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'OpenAIClient',\n", - " '_init_args': {'api_key': None,\n", - " 'chat_completion_parser': None,\n", - " 'input_type': 'text'},\n", - " '_api_key': None,\n", - " 'chat_completion_parser': str>,\n", - " '_input_type': 'text'}})]},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'Generator',\n", - " '_init_args': {'model_client': None,\n", - " 'model_kwargs': {},\n", - " 'template': None,\n", - " 'prompt_kwargs': {},\n", - " 'output_processors': None,\n", - " 'name': None,\n", - " 'cache_path': None,\n", - " 'use_cache': False},\n", - " 'backward_engine': None,\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_kwargs': {},\n", - " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", - " 'output_processors': None,\n", - " 'mock_output': False,\n", - " 'mock_output_data': 'mock data',\n", - " 'data_map_func': .default_map_func(data: 'GeneratorOutputType') -> str>,\n", - " '_use_cache': False,\n", - " '_kwargs': {'model_client': {'type': 'OpenAIClient',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'OpenAIClient',\n", - " '_init_args': {'api_key': None,\n", - " 'chat_completion_parser': None,\n", - " 'input_type': 'text'},\n", - " '_api_key': None,\n", - " 'chat_completion_parser': str>,\n", - " '_input_type': 'text'}},\n", - " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_kwargs': {},\n", - " 'output_processors': None,\n", - " 'name': None,\n", - " 'cache_path': None,\n", - " 'use_cache': False},\n", - " '_teacher': None}})]},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'DocQA',\n", - " '_init_args': {}}}" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "doc2.to_dict() == doc.to_dict()\n", - "doc2.to_dict()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:40:52 - prompt_builder - INFO - [prompt_builder.py:65:__init__] - Prompt has variables: ['input_str']\n", + "2024-11-11 17:40:52 - generator - INFO - [generator.py:144:__init__] - Generator Generator initialized.\n" + ] + } + ], + "source": [ + "doc = DocQA()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "f-y6l44PTQUp", + "outputId": "e24aabd5-d758-4700-fa0d-46b66a88c412" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Ulb1OWxxTQUq", - "outputId": "99972fcd-ed52-43b4-e461-a76c19bd9522" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:41:29 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?'}]}\n", - "2024-11-11 17:41:30 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "2024-11-11 17:41:30 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', metadata=None)\n", - "As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.\n" - ] - } - ], - "source": [ - "print(doc(\"What is the best treatment for headache?\"))" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "{'type': 'DocQA', 'data': {'_components': {'_ordered_dict': True, 'data': [('doc', {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': , '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': .default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'DocQA', '_init_args': {}}}\n" + ] }, { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "POVal8CgTQUq", - "outputId": "2fadb1d6-b858-4964-9045-8ea7454178e3" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:41:35 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?'}]}\n", - "2024-11-11 17:41:36 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "2024-11-11 17:41:36 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', metadata=None)\n", - "As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.\n" - ] - } - ], - "source": [ - "print(doc2(\"What is the best treatment for headache?\"))" + "data": { + "text/plain": [ + "{'_components': OrderedDict([('doc',\n", + " Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", + " (model_client): OpenAIClient()\n", + " ))]),\n", + " '_parameters': OrderedDict(),\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'DocQA',\n", + " '_init_args': {}}" ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# states\n", + "states = doc.to_dict()\n", + "print(states)\n", + "doc.__dict__" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z_sH59_bTQUp" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "P81kIS2qTQUp", + "outputId": "d8e0e398-d704-4a85-8692-66a8c570b910" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "R5gTO1-8TQUr" - }, - "source": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Generator, {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': , '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': .default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}}\n", + "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Prompt, {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}\n", + "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}\n", + "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}\n" + ] }, { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jhgSpKrMTQUr", - "outputId": "15615bf7-2b72-4ac7-d1fe-f436a7304734" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('', DocQA(\n", - " (doc): Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", - " (model_client): OpenAIClient()\n", - " )\n", - "))\n", - "('doc', Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", - " (model_client): OpenAIClient()\n", - "))\n", - "('doc.prompt', Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str']))\n", - "('doc.model_client', OpenAIClient())\n" - ] - } - ], - "source": [ - "# list other subcomponents\n", - "\n", - "for subcomponent in doc.named_components():\n", - " print(subcomponent)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XjIHAY6bTQUr" - }, - "source": [ - "Let's add a parameter" + "data": { + "text/plain": [ + "{'_components': OrderedDict([('doc',\n", + " Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", + " (model_client): OpenAIClient()\n", + " ))]),\n", + " '_parameters': OrderedDict(),\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'DocQA',\n", + " '_init_args': {}}" ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# restore the states\n", + "doc2 = DocQA.from_dict(states)\n", + "# print(doc2.call(\"What is the capital of France?\"))\n", + "doc2.__dict__\n", + "# doc2.to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "198xYpLGTQUp", + "outputId": "ffd33d12-6db0-45c2-dfb1-3d57460ad4c9" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "id": "vxgjAUiFTQUr" - }, - "outputs": [], - "source": [ - "from adalflow.optim.parameter import Parameter\n", - "\n", - "doc.register_parameter(\"demo\", param=Parameter(data=\"demo\"))" + "data": { + "text/plain": [ + "{'type': 'DocQA',\n", + " 'data': {'_components': {'_ordered_dict': True,\n", + " 'data': [('doc',\n", + " {'type': 'Generator',\n", + " 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n", + " 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n", + " 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n", + " 'cache': ,\n", + " '_components': {'_ordered_dict': True,\n", + " 'data': [('prompt',\n", + " {'type': 'Prompt',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'Prompt',\n", + " '_init_args': {'template': None, 'prompt_kwargs': {}},\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_variables': ['input_str'],\n", + " 'prompt_kwargs': {}}}),\n", + " ('model_client',\n", + " {'type': 'OpenAIClient',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'OpenAIClient',\n", + " '_init_args': {'api_key': None,\n", + " 'chat_completion_parser': None,\n", + " 'input_type': 'text'},\n", + " '_api_key': None,\n", + " 'chat_completion_parser': str>,\n", + " '_input_type': 'text'}})]},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'Generator',\n", + " '_init_args': {'model_client': None,\n", + " 'model_kwargs': {},\n", + " 'template': None,\n", + " 'prompt_kwargs': {},\n", + " 'output_processors': None,\n", + " 'name': None,\n", + " 'cache_path': None,\n", + " 'use_cache': False},\n", + " 'backward_engine': None,\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_kwargs': {},\n", + " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", + " 'output_processors': None,\n", + " 'mock_output': False,\n", + " 'mock_output_data': 'mock data',\n", + " 'data_map_func': .default_map_func(data: 'GeneratorOutputType') -> str>,\n", + " '_use_cache': False,\n", + " '_kwargs': {'model_client': {'type': 'OpenAIClient',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'OpenAIClient',\n", + " '_init_args': {'api_key': None,\n", + " 'chat_completion_parser': None,\n", + " 'input_type': 'text'},\n", + " '_api_key': None,\n", + " 'chat_completion_parser': str>,\n", + " '_input_type': 'text'}},\n", + " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_kwargs': {},\n", + " 'output_processors': None,\n", + " 'name': None,\n", + " 'cache_path': None,\n", + " 'use_cache': False},\n", + " '_teacher': None}})]},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'DocQA',\n", + " '_init_args': {}}}" ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc2.to_dict() == doc.to_dict()\n", + "doc2.to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "Ulb1OWxxTQUq", + "outputId": "99972fcd-ed52-43b4-e461-a76c19bd9522" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "86C-h1e1TQUr", - "outputId": "57cab4d0-eddf-433d-e364-5d7f07072fbf" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('demo', Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[], raw_response=None, input_args=None, traces={}))\n" - ] - } - ], - "source": [ - "# list all parameters\n", - "for param in doc.named_parameters():\n", - " print(param)" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:41:29 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?'}]}\n", + "2024-11-11 17:41:30 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2024-11-11 17:41:30 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', metadata=None)\n", + "As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.\n" + ] + } + ], + "source": [ + "print(doc(\"What is the best treatment for headache?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "POVal8CgTQUq", + "outputId": "2fadb1d6-b858-4964-9045-8ea7454178e3" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "_s2MPukiTQUr", - "outputId": "b51c7d09-fb52-42d9-b2d5-4f44f5d22dc9" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'type': 'DocQA',\n", - " 'data': {'_components': {'_ordered_dict': True,\n", - " 'data': [('doc',\n", - " {'type': 'Generator',\n", - " 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n", - " 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n", - " 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n", - " 'cache': ,\n", - " '_components': {'_ordered_dict': True,\n", - " 'data': [('prompt',\n", - " {'type': 'Prompt',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'Prompt',\n", - " '_init_args': {'template': None, 'prompt_kwargs': {}},\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_variables': ['input_str'],\n", - " 'prompt_kwargs': {}}}),\n", - " ('model_client',\n", - " {'type': 'OpenAIClient',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'OpenAIClient',\n", - " '_init_args': {'api_key': None,\n", - " 'chat_completion_parser': None,\n", - " 'input_type': 'text'},\n", - " '_api_key': None,\n", - " 'chat_completion_parser': str>,\n", - " '_input_type': 'text'}})]},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'Generator',\n", - " '_init_args': {'model_client': None,\n", - " 'model_kwargs': {},\n", - " 'template': None,\n", - " 'prompt_kwargs': {},\n", - " 'output_processors': None,\n", - " 'name': None,\n", - " 'cache_path': None,\n", - " 'use_cache': False},\n", - " 'backward_engine': None,\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_kwargs': {},\n", - " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", - " 'output_processors': None,\n", - " 'mock_output': False,\n", - " 'mock_output_data': 'mock data',\n", - " 'data_map_func': .default_map_func(data: 'GeneratorOutputType') -> str>,\n", - " '_use_cache': False,\n", - " '_kwargs': {'model_client': {'type': 'OpenAIClient',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'OpenAIClient',\n", - " '_init_args': {'api_key': None,\n", - " 'chat_completion_parser': None,\n", - " 'input_type': 'text'},\n", - " '_api_key': None,\n", - " 'chat_completion_parser': str>,\n", - " '_input_type': 'text'}},\n", - " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_kwargs': {},\n", - " 'output_processors': None,\n", - " 'name': None,\n", - " 'cache_path': None,\n", - " 'use_cache': False},\n", - " '_teacher': None}})]},\n", - " '_parameters': {'_ordered_dict': True,\n", - " 'data': [('demo',\n", - " {'name': 'param_313f196d-3c48-4eb3-8138-b7bd74298fbd',\n", - " 'role_desc': '',\n", - " 'data': 'demo',\n", - " 'requires_opt': True,\n", - " 'param_type': 'none ()',\n", - " 'predecessors': [],\n", - " 'gradients': [],\n", - " 'previous_data': None,\n", - " 'gradients_context': [],\n", - " 'grad_fn': 'None',\n", - " 'gradient_prompt': 'None',\n", - " 'raw_response': None,\n", - " 'score': None,\n", - " 'traces': {},\n", - " 'input_args': None,\n", - " 'demos': []})]},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'DocQA',\n", - " '_init_args': {}}}" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "doc.to_dict()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:41:35 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?'}]}\n", + "2024-11-11 17:41:36 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2024-11-11 17:41:36 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', metadata=None)\n", + "As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.\n" + ] + } + ], + "source": [ + "print(doc2(\"What is the best treatment for headache?\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R5gTO1-8TQUr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "jhgSpKrMTQUr", + "outputId": "15615bf7-2b72-4ac7-d1fe-f436a7304734" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "id": "mcIO1DuVTQUr" - }, - "outputs": [], - "source": [ - "from adalflow.utils.file_io import save_json\n", - "\n", - "save_json(doc.to_dict(), \"doc.json\")" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "('', DocQA(\n", + " (doc): Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", + " (model_client): OpenAIClient()\n", + " )\n", + "))\n", + "('doc', Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", + " (model_client): OpenAIClient()\n", + "))\n", + "('doc.prompt', Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str']))\n", + "('doc.model_client', OpenAIClient())\n" + ] + } + ], + "source": [ + "# list other subcomponents\n", + "\n", + "for subcomponent in doc.named_components():\n", + " print(subcomponent)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XjIHAY6bTQUr" + }, + "source": [ + "Let's add a parameter" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "vxgjAUiFTQUr" + }, + "outputs": [], + "source": [ + "from adalflow.optim.parameter import Parameter\n", + "\n", + "doc.register_parameter(\"demo\", param=Parameter(data=\"demo\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "86C-h1e1TQUr", + "outputId": "57cab4d0-eddf-433d-e364-5d7f07072fbf" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0vvO0nogTQUr", - "outputId": "59131d9e-a996-4c8b-f32c-9a6a623d3db6" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "OrderedDict([('demo',\n", - " Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[], raw_response=None, input_args=None, traces={}))])" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "doc.state_dict()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "('demo', Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[], raw_response=None, input_args=None, traces={}))\n" + ] + } + ], + "source": [ + "# list all parameters\n", + "for param in doc.named_parameters():\n", + " print(param)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "_s2MPukiTQUr", + "outputId": "b51c7d09-fb52-42d9-b2d5-4f44f5d22dc9" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 125 - }, - "id": "uroqi93tTQUs", - "outputId": "8a3e4ecc-1368-475b-dc4d-2ff38821b8ac" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:42:18 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for a cold?'}]}\n", - "2024-11-11 17:42:19 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "2024-11-11 17:42:19 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=85, prompt_tokens=28, total_tokens=113), raw_response='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', metadata=None)\n" - ] - }, - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - }, - "text/plain": [ - "'As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.'" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "doc.call(\"What is the best treatment for a cold?\")" + "data": { + "text/plain": [ + "{'type': 'DocQA',\n", + " 'data': {'_components': {'_ordered_dict': True,\n", + " 'data': [('doc',\n", + " {'type': 'Generator',\n", + " 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n", + " 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n", + " 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n", + " 'cache': ,\n", + " '_components': {'_ordered_dict': True,\n", + " 'data': [('prompt',\n", + " {'type': 'Prompt',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'Prompt',\n", + " '_init_args': {'template': None, 'prompt_kwargs': {}},\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_variables': ['input_str'],\n", + " 'prompt_kwargs': {}}}),\n", + " ('model_client',\n", + " {'type': 'OpenAIClient',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'OpenAIClient',\n", + " '_init_args': {'api_key': None,\n", + " 'chat_completion_parser': None,\n", + " 'input_type': 'text'},\n", + " '_api_key': None,\n", + " 'chat_completion_parser': str>,\n", + " '_input_type': 'text'}})]},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'Generator',\n", + " '_init_args': {'model_client': None,\n", + " 'model_kwargs': {},\n", + " 'template': None,\n", + " 'prompt_kwargs': {},\n", + " 'output_processors': None,\n", + " 'name': None,\n", + " 'cache_path': None,\n", + " 'use_cache': False},\n", + " 'backward_engine': None,\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_kwargs': {},\n", + " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", + " 'output_processors': None,\n", + " 'mock_output': False,\n", + " 'mock_output_data': 'mock data',\n", + " 'data_map_func': .default_map_func(data: 'GeneratorOutputType') -> str>,\n", + " '_use_cache': False,\n", + " '_kwargs': {'model_client': {'type': 'OpenAIClient',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'OpenAIClient',\n", + " '_init_args': {'api_key': None,\n", + " 'chat_completion_parser': None,\n", + " 'input_type': 'text'},\n", + " '_api_key': None,\n", + " 'chat_completion_parser': str>,\n", + " '_input_type': 'text'}},\n", + " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_kwargs': {},\n", + " 'output_processors': None,\n", + " 'name': None,\n", + " 'cache_path': None,\n", + " 'use_cache': False},\n", + " '_teacher': None}})]},\n", + " '_parameters': {'_ordered_dict': True,\n", + " 'data': [('demo',\n", + " {'name': 'param_313f196d-3c48-4eb3-8138-b7bd74298fbd',\n", + " 'role_desc': '',\n", + " 'data': 'demo',\n", + " 'requires_opt': True,\n", + " 'param_type': 'none ()',\n", + " 'predecessors': [],\n", + " 'gradients': [],\n", + " 'previous_data': None,\n", + " 'gradients_context': [],\n", + " 'grad_fn': 'None',\n", + " 'gradient_prompt': 'None',\n", + " 'raw_response': None,\n", + " 'score': None,\n", + " 'traces': {},\n", + " 'input_args': None,\n", + " 'demos': []})]},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'DocQA',\n", + " '_init_args': {}}}" ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc.to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "mcIO1DuVTQUr" + }, + "outputs": [], + "source": [ + "from adalflow.utils.file_io import save_json\n", + "\n", + "save_json(doc.to_dict(), \"doc.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "0vvO0nogTQUr", + "outputId": "59131d9e-a996-4c8b-f32c-9a6a623d3db6" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "mYSDr462TQUs", - "outputId": "82414c82-8feb-4667-90ed-91c594cc6a73" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2\n", - "\n" - ] - } - ], - "source": [ - "from adalflow.core.component import FunComponent\n", - "\n", - "def add_one(x):\n", - " return x + 1\n", - "\n", - "fun_component = FunComponent(add_one)\n", - "print(fun_component(1))\n", - "print(type(fun_component))\n", - "\n", - "# output:\n", - "# 2\n", - "# " + "data": { + "text/plain": [ + "OrderedDict([('demo',\n", + " Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[], raw_response=None, input_args=None, traces={}))])" ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc.state_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 125 }, + "id": "uroqi93tTQUs", + "outputId": "8a3e4ecc-1368-475b-dc4d-2ff38821b8ac" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "3MW1tpzRTQUs", - "outputId": "351b8922-1423-434a-f470-ff435a1962d2" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2\n", - "\n" - ] - } - ], - "source": [ - "from adalflow.core.component import fun_to_component\n", - "\n", - "fun_component = fun_to_component(add_one)\n", - "print(fun_component(1))\n", - "print(type(fun_component))\n", - "\n", - "# output:\n", - "# 2\n", - "# " - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:42:18 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for a cold?'}]}\n", + "2024-11-11 17:42:19 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2024-11-11 17:42:19 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=85, prompt_tokens=28, total_tokens=113), raw_response='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', metadata=None)\n" + ] }, { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "dxAoGrnQTQUs", - "outputId": "38c462a3-5abf-41f4-9231-746c8d0ffcb3" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2\n", - "\n" - ] - } - ], - "source": [ - "# use it as a decorator\n", - "@fun_to_component\n", - "def add_one(x):\n", - " return x + 1\n", - "\n", - "print(add_one(1))\n", - "print(type(add_one))\n", - "\n", - "# output:\n", - "# 2\n", - "# " + "text/plain": [ + "'As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.'" ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc.call(\"What is the best treatment for a cold?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "mYSDr462TQUs", + "outputId": "82414c82-8feb-4667-90ed-91c594cc6a73" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7BvJEP_mTQUs", - "outputId": "066281b8-a650-4c48-c786-312022198015" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:42:39 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?Please be concise and only list the top treatments.'}]}\n", - "2024-11-11 17:42:40 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "2024-11-11 17:42:40 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', error=None, usage=CompletionUsage(completion_tokens=37, prompt_tokens=37, total_tokens=74), raw_response='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', metadata=None)\n", - "The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.\n" - ] - } - ], - "source": [ - "from adalflow.core import Sequential\n", - "\n", - "@fun_to_component\n", - "def enhance_query(query:str) -> str:\n", - " return query + \"Please be concise and only list the top treatments.\"\n", - "\n", - "seq = Sequential(enhance_query, doc)\n", - "\n", - "query = \"What is the best treatment for headache?\"\n", - "print(seq(query))" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "\n" + ] + } + ], + "source": [ + "from adalflow.core.component import FunComponent\n", + "\n", + "\n", + "def add_one(x):\n", + " return x + 1\n", + "\n", + "\n", + "fun_component = FunComponent(add_one)\n", + "print(fun_component(1))\n", + "print(type(fun_component))\n", + "\n", + "# output:\n", + "# 2\n", + "# " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "3MW1tpzRTQUs", + "outputId": "351b8922-1423-434a-f470-ff435a1962d2" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "aoZ2w8RUTQUt", - "outputId": "115d0ccf-33d1-4464-a951-cf9f5476284b" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Sequential(\n", - " (0): EnhanceQueryComponent(fun_name=enhance_query)\n", - " (1): DocQA(\n", - " (doc): Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", - " (model_client): OpenAIClient()\n", - " )\n", - " )\n", - ")" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seq" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "\n" + ] + } + ], + "source": [ + "from adalflow.core.component import fun_to_component\n", + "\n", + "fun_component = fun_to_component(add_one)\n", + "print(fun_component(1))\n", + "print(type(fun_component))\n", + "\n", + "# output:\n", + "# 2\n", + "# " + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "dxAoGrnQTQUs", + "outputId": "38c462a3-5abf-41f4-9231-746c8d0ffcb3" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "F-ffAlC6TQUt" - }, - "source": [ - "# TODO: LLM for single choices" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "\n" + ] + } + ], + "source": [ + "# use it as a decorator\n", + "@fun_to_component\n", + "def add_one(x):\n", + " return x + 1\n", + "\n", + "\n", + "print(add_one(1))\n", + "print(type(add_one))\n", + "\n", + "# output:\n", + "# 2\n", + "# " + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "7BvJEP_mTQUs", + "outputId": "066281b8-a650-4c48-c786-312022198015" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Issues and feedback\n", - "\n", - "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n", - "\n", - "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:42:39 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?Please be concise and only list the top treatments.'}]}\n", + "2024-11-11 17:42:40 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2024-11-11 17:42:40 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', error=None, usage=CompletionUsage(completion_tokens=37, prompt_tokens=37, total_tokens=74), raw_response='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', metadata=None)\n", + "The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.\n" + ] } - ], - "metadata": { + ], + "source": [ + "from adalflow.core import Sequential\n", + "\n", + "\n", + "@fun_to_component\n", + "def enhance_query(query: str) -> str:\n", + " return query + \"Please be concise and only list the top treatments.\"\n", + "\n", + "\n", + "seq = Sequential(enhance_query, doc)\n", + "\n", + "query = \"What is the best treatment for headache?\"\n", + "print(seq(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" + "id": "aoZ2w8RUTQUt", + "outputId": "115d0ccf-33d1-4464-a951-cf9f5476284b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Sequential(\n", + " (0): EnhanceQueryComponent(fun_name=enhance_query)\n", + " (1): DocQA(\n", + " (doc): Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", + " (model_client): OpenAIClient()\n", + " )\n", + " )\n", + ")" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" } + ], + "source": [ + "seq" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F-ffAlC6TQUt" + }, + "source": [ + "# TODO: LLM for single choices" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Issues and feedback\n", + "\n", + "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n", + "\n", + "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/notebooks/tutorials/adalflow_dataclasses.ipynb b/notebooks/tutorials/adalflow_dataclasses.ipynb index 3c96ffe5..7ae08f63 100644 --- a/notebooks/tutorials/adalflow_dataclasses.ipynb +++ b/notebooks/tutorials/adalflow_dataclasses.ipynb @@ -145,8 +145,8 @@ "\n", "\n", "# Set environment variables\n", - "os.environ['GROQ_API_KEY'] = groq_api_key\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ[\"GROQ_API_KEY\"] = groq_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", "\n", "print(\"API keys have been set.\")" ] @@ -231,12 +231,11 @@ " explanation: str = field(\n", " metadata={\"desc\": \"A brief explanation of the concept in one sentence.\"}\n", " )\n", - " example: str = field(\n", - " metadata={\"desc\": \"An example of the concept in a sentence.\"}\n", - " )\n", + " example: str = field(metadata={\"desc\": \"An example of the concept in a sentence.\"})\n", " # Control output fields order\n", " __output_fields__ = [\"explanation\", \"example\"]\n", "\n", + "\n", "# Define the template using jinja2 syntax\n", "qa_template = r\"\"\"\n", "You are a helpful assistant.\n", @@ -278,7 +277,7 @@ "\n", " async def acall(self, query: str):\n", " \"\"\"Asynchronous call to generate response\"\"\"\n", - " return await self.generator.acall({\"input_str\": query})\n" + " return await self.generator.acall({\"input_str\": query})" ] }, { @@ -380,26 +379,20 @@ "# 1. Basic DataClass with different field types\n", "@dataclass\n", "class MovieReview(adal.DataClass):\n", - " title: str = field(\n", - " metadata={\"desc\": \"The title of the movie\"}\n", - " )\n", + " title: str = field(metadata={\"desc\": \"The title of the movie\"})\n", " rating: float = field(\n", - " metadata={\n", - " \"desc\": \"Rating from 1.0 to 10.0\",\n", - " \"min\": 1.0,\n", - " \"max\": 10.0\n", - " }\n", + " metadata={\"desc\": \"Rating from 1.0 to 10.0\", \"min\": 1.0, \"max\": 10.0}\n", " )\n", " pros: List[str] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"List of positive points about the movie\"}\n", + " metadata={\"desc\": \"List of positive points about the movie\"},\n", " )\n", " cons: List[str] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"List of negative points about the movie\"}\n", + " metadata={\"desc\": \"List of negative points about the movie\"},\n", " )\n", "\n", - " __output_fields__ = [\"title\", \"rating\", \"pros\", \"cons\"]\n" + " __output_fields__ = [\"title\", \"rating\", \"pros\", \"cons\"]" ] }, { @@ -410,7 +403,6 @@ }, "outputs": [], "source": [ - "\n", "@dataclass\n", "class Actor(adal.DataClass):\n", " name: str = field(metadata={\"desc\": \"Actor's full name\"})\n", @@ -429,20 +421,18 @@ "\n", "# Have both MovieReview and Actor nested in DetailedMovieReview\n", "\n", + "\n", "@dataclass\n", "class DetailedMovieReview(adal.DataClass):\n", " basic_review: MovieReview\n", " cast: List[Actor] = field(\n", - " default_factory=list,\n", - " metadata={\"desc\": \"List of main actors in the movie\"}\n", + " default_factory=list, metadata={\"desc\": \"List of main actors in the movie\"}\n", " )\n", " genre: List[str] = field(\n", - " default_factory=list,\n", - " metadata={\"desc\": \"List of genres for the movie\"}\n", + " default_factory=list, metadata={\"desc\": \"List of genres for the movie\"}\n", " )\n", " recommend: bool = field(\n", - " default_factory=str,\n", - " metadata={\"desc\": \"Whether you would recommend this movie\"}\n", + " default_factory=str, metadata={\"desc\": \"Whether you would recommend this movie\"}\n", " )\n", "\n", " __output_fields__ = [\"basic_review\", \"cast\", \"genre\", \"recommend\"]" @@ -472,18 +462,25 @@ "source": [ "# Create the MovieReviewer component with MovieAnalysis data class\n", "class MovieReviewer(adal.Component):\n", - " def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict, data_class: adal.DataClass):\n", + " def __init__(\n", + " self,\n", + " model_client: adal.ModelClient,\n", + " model_kwargs: Dict,\n", + " data_class: adal.DataClass,\n", + " ):\n", " super().__init__()\n", - " self.additional_structure_prompt = \"Dont use 'type' and 'properties' in output directly give as dict\"\n", - " parser = adal.DataClassParser(\n", - " data_class=data_class,\n", - " return_data_class=True\n", + " self.additional_structure_prompt = (\n", + " \"Dont use 'type' and 'properties' in output directly give as dict\"\n", " )\n", + " parser = adal.DataClassParser(data_class=data_class, return_data_class=True)\n", " self.generator = adal.Generator(\n", " model_client=model_client,\n", " model_kwargs=model_kwargs,\n", " template=movie_review_template,\n", - " prompt_kwargs={\"output_format_str\": parser.get_output_format_str() + self.additional_structure_prompt},\n", + " prompt_kwargs={\n", + " \"output_format_str\": parser.get_output_format_str()\n", + " + self.additional_structure_prompt\n", + " },\n", " output_processors=parser,\n", " )\n", "\n", @@ -512,7 +509,7 @@ "reviewer = MovieReviewer(\n", " model_client=GroqAPIClient(),\n", " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", - " data_class=DetailedMovieReview\n", + " data_class=DetailedMovieReview,\n", ")\n", "\n", "response = reviewer(\"The Matrix\")\n", @@ -541,7 +538,7 @@ "reviewer = MovieReviewer(\n", " model_client=adal.OpenAIClient(),\n", " model_kwargs={\"model\": \"gpt-4o\"},\n", - " data_class=DetailedMovieReview\n", + " data_class=DetailedMovieReview,\n", ")\n", "response = reviewer(\"The Matrix\")\n", "print(f\"DetailedMovieReview: {response.data}\")\n", @@ -566,16 +563,16 @@ "source": [ "# 3. second level nested dataclass\n", "\n", + "\n", "@dataclass\n", "class MovieAnalysis(adal.DataClass):\n", " review: DetailedMovieReview\n", " box_office: float = field(\n", - " default=None,\n", - " metadata={\"desc\": \"Box office earnings in millions of dollars\"}\n", + " default=None, metadata={\"desc\": \"Box office earnings in millions of dollars\"}\n", " )\n", " awards: Dict[str, int] = field(\n", " default=None,\n", - " metadata={\"desc\": \"Dictionary of award categories and number of wins\"}\n", + " metadata={\"desc\": \"Dictionary of award categories and number of wins\"},\n", " )\n", "\n", " __output_fields__ = [\"review\", \"box_office\", \"awards\"]" @@ -605,7 +602,7 @@ "analysis = MovieReviewer(\n", " model_client=adal.OpenAIClient(),\n", " model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n", - " data_class=MovieAnalysis\n", + " data_class=MovieAnalysis,\n", ")\n", "\n", "response = analysis(\"The Matrix\")\n", @@ -637,7 +634,7 @@ "analysis = MovieReviewer(\n", " model_client=GroqAPIClient(),\n", " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", - " data_class=MovieAnalysis\n", + " data_class=MovieAnalysis,\n", ")\n", "\n", "response = analysis(\"The Matrix\")\n", @@ -668,33 +665,27 @@ "# 1. Basic DataClass with different field types\n", "@dataclass\n", "class SongReview(adal.DataClass):\n", - " title: str = field(\n", - " metadata={\"desc\": \"The title of the song\"}\n", - " )\n", - " album: str = field(\n", - " metadata={\"desc\": \"The album of the song\"}\n", - " )\n", + " title: str = field(metadata={\"desc\": \"The title of the song\"})\n", + " album: str = field(metadata={\"desc\": \"The album of the song\"})\n", " ranking: int = field(\n", - " metadata={\n", - " \"desc\": \"Billboard peak ranking from 1 to 200\",\n", - " \"min\": 1,\n", - " \"max\": 200\n", - " }\n", + " metadata={\"desc\": \"Billboard peak ranking from 1 to 200\", \"min\": 1, \"max\": 200}\n", " )\n", " streaming: Dict[str, int] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"Dict of lastest approximate streaming count in spotify and in youtube. Gives the count in millions\"}\n", + " metadata={\n", + " \"desc\": \"Dict of lastest approximate streaming count in spotify and in youtube. Gives the count in millions\"\n", + " },\n", " )\n", " pros: List[str] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"List of positive points about the song\"}\n", + " metadata={\"desc\": \"List of positive points about the song\"},\n", " )\n", " cons: List[str] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"List of negative points about the song\"}\n", + " metadata={\"desc\": \"List of negative points about the song\"},\n", " )\n", "\n", - " __output_fields__ = [\"title\", \"rating\", \"streaming\", \"pros\", \"cons\"]\n" + " __output_fields__ = [\"title\", \"rating\", \"streaming\", \"pros\", \"cons\"]" ] }, { @@ -705,7 +696,6 @@ }, "outputs": [], "source": [ - "\n", "@dataclass\n", "class Artist(adal.DataClass):\n", " name: str = field(metadata={\"desc\": \"Artist's full name\"})\n", @@ -722,6 +712,7 @@ "source": [ "# 2. Nested DataClass example\n", "\n", + "\n", "@dataclass\n", "class DetailedSongReview(adal.DataClass):\n", " basic_review: SongReview = field(\n", @@ -729,15 +720,13 @@ " )\n", " cast: List[Artist] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"List of main singer, lyrisist and musicians in the song\"}\n", + " metadata={\"desc\": \"List of main singer, lyrisist and musicians in the song\"},\n", " )\n", " genre: List[str] = field(\n", - " default_factory=list,\n", - " metadata={\"desc\": \"List of genres for the song\"}\n", + " default_factory=list, metadata={\"desc\": \"List of genres for the song\"}\n", " )\n", " recommend: bool = field(\n", - " default_factory=str,\n", - " metadata={\"desc\": \"Whether you would recommend this song\"}\n", + " default_factory=str, metadata={\"desc\": \"Whether you would recommend this song\"}\n", " )\n", "\n", " __output_fields__ = [\"basic_review\", \"cast\", \"genre\", \"recommend\"]" @@ -753,21 +742,19 @@ "source": [ "# 3. two levels of nesting dataclass\n", "\n", - "# all these fields as we use default, it is optional, so \n", + "# all these fields as we use default, it is optional, so\n", "# llm might not output that field if they dont have information\n", "\n", + "\n", "@dataclass\n", "class SongAnalysis(adal.DataClass):\n", " review: DetailedSongReview = field(\n", " default=DetailedSongReview, metadata={\"desc\": \"Song review details\"}\n", " )\n", - " duration: float = field(\n", - " default=None,\n", - " metadata={\"desc\": \"Duration of the song\"}\n", - " )\n", + " duration: float = field(default=None, metadata={\"desc\": \"Duration of the song\"})\n", " awards: Dict[str, int] = field(\n", " default=None,\n", - " metadata={\"desc\": \"Dictionary of award categories and number of wins\"}\n", + " metadata={\"desc\": \"Dictionary of award categories and number of wins\"},\n", " )\n", "\n", " __output_fields__ = [\"review\", \"duration\", \"awards\"]" @@ -788,7 +775,7 @@ "{{output_format_str}}\n", "\n", "\n", - " Review this song: {{song_title}} \"\"\"\n" + " Review this song: {{song_title}} \"\"\"" ] }, { @@ -803,17 +790,20 @@ "class SongReviewer(adal.Component):\n", " def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n", " super().__init__()\n", - " self.additional_structure_prompt = \"Dont use 'type' and 'properties' in output directly give as dict\"\n", + " self.additional_structure_prompt = (\n", + " \"Dont use 'type' and 'properties' in output directly give as dict\"\n", + " )\n", " parser = adal.DataClassParser(\n", - " data_class=SongAnalysis,\n", - " return_data_class=False,\n", - " format_type=\"json\"\n", + " data_class=SongAnalysis, return_data_class=False, format_type=\"json\"\n", " )\n", " self.generator = adal.Generator(\n", " model_client=model_client,\n", " model_kwargs=model_kwargs,\n", " template=song_review_template,\n", - " prompt_kwargs={\"output_format_str\": parser.get_output_format_str() + self.additional_structure_prompt },\n", + " prompt_kwargs={\n", + " \"output_format_str\": parser.get_output_format_str()\n", + " + self.additional_structure_prompt\n", + " },\n", " output_processors=parser,\n", " )\n", "\n", @@ -836,8 +826,8 @@ ], "source": [ "analysis = SongReviewer(\n", - " model_client=GroqAPIClient(),\n", - " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", + " model_client=GroqAPIClient(),\n", + " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", ")\n", "\n", "response = analysis(\"Shape of you\")\n", @@ -886,27 +876,27 @@ "print(f\"Album: {analysis['review']['basic_review']['album']}\")\n", "print(f\"Ranking: {analysis['review']['basic_review']['ranking']}\")\n", "\n", - "for platform, views in analysis['review']['basic_review']['streaming'].items():\n", + "for platform, views in analysis[\"review\"][\"basic_review\"][\"streaming\"].items():\n", " print(f\"- {platform} - {views} million views\")\n", "print(\"\\nPros:\")\n", - "for pro in analysis['review'][\"basic_review\"][\"pros\"]:\n", + "for pro in analysis[\"review\"][\"basic_review\"][\"pros\"]:\n", " print(f\"- {pro}\")\n", "\n", "print(\"\\nArtist's:\")\n", - "for actor in analysis['review'][\"cast\"]:\n", - " print(f\"- {actor['name']} as {actor['role']}\")\n", + "for actor in analysis[\"review\"][\"cast\"]:\n", + " print(f\"- {actor['name']} as {actor['role']}\")\n", "\n", - "if analysis['review']['genre']:\n", + "if analysis[\"review\"][\"genre\"]:\n", " print(\"\\nGenere: \")\n", - " for genre in analysis['review']['genre']:\n", + " for genre in analysis[\"review\"][\"genre\"]:\n", " print(f\" {genre} \")\n", "\n", - "if analysis['duration']:\n", + "if analysis[\"duration\"]:\n", " print(f\"\\nDuration: {analysis['duration']} minutes\")\n", "\n", - "if hasattr(analysis, 'awards') and analysis['awards']:\n", + "if hasattr(analysis, \"awards\") and analysis[\"awards\"]:\n", " print(\"\\nAwards:\")\n", - " for category, count in analysis['awards'].items():\n", + " for category, count in analysis[\"awards\"].items():\n", " print(f\"- {category}: {count}\")" ] }, diff --git a/notebooks/tutorials/adalflow_function_calls.ipynb b/notebooks/tutorials/adalflow_function_calls.ipynb index 622448c9..6fba3594 100644 --- a/notebooks/tutorials/adalflow_function_calls.ipynb +++ b/notebooks/tutorials/adalflow_function_calls.ipynb @@ -1,737 +1,736 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Function calls\n", + "\n", + "Tools are means LLM can use to interact with the world beyond of its internal knowledge. Technically speaking, retrievers are tools to help LLM to get more relevant context, and memory is a tool for LLM to carry out a conversation. Deciding when, which, and how to use a tool, and even to creating a tool is an agentic behavior: Function calls is a process of showing LLM a list of funciton definitions and prompt it to choose one or few of them. Many places use tools and function calls interchangably.\n", + "\n", + "In this notebook we will covert function calls, including:\n", + "\n", + "- Function call walkthrough\n", + "\n", + "- Overall design\n", + "\n", + "- Function call in action\n", + "\n", + "It follows the tutorial here: https://adalflow.sylph.ai/tutorials/tool_helper.html#" + ], + "metadata": { + "id": "lLGpv1fLLIjF" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "sfKEfaYC3Go7" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai,groq,faiss-cpu]\n", + "\n", + "clear_output()" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", + "\n", + "# Set environment variables\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "os.environ[\"GROQ_API_KEY\"] = groq_api_key\n", + "\n", + "print(\"API keys have been set.\")" + ], + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python" + "id": "-4c_AGBt3PlR", + "outputId": "21a26437-9f95-4478-84e9-ba4369956b6f" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", + "Please enter your GROQ API key: ยทยทยทยทยทยทยทยทยทยท\n", + "API keys have been set.\n" + ] } + ] }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Function calls\n", - "\n", - "Tools are means LLM can use to interact with the world beyond of its internal knowledge. Technically speaking, retrievers are tools to help LLM to get more relevant context, and memory is a tool for LLM to carry out a conversation. Deciding when, which, and how to use a tool, and even to creating a tool is an agentic behavior: Function calls is a process of showing LLM a list of funciton definitions and prompt it to choose one or few of them. Many places use tools and function calls interchangably.\n", - "\n", - "In this notebook we will covert function calls, including:\n", - "\n", - "- Function call walkthrough\n", - "\n", - "- Overall design\n", - "\n", - "- Function call in action\n", - "\n", - "It follows the tutorial here: https://adalflow.sylph.ai/tutorials/tool_helper.html#" - ], - "metadata": { - "id": "lLGpv1fLLIjF" - } - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "sfKEfaYC3Go7" - }, - "outputs": [], - "source": [ - "from IPython.display import clear_output\n", - "\n", - "!pip install -U adalflow[openai,groq,faiss-cpu]\n", - "\n", - "clear_output()\n" - ] - }, - { - "cell_type": "code", - "source": [ - "import os\n", - "from getpass import getpass\n", - "\n", - "# Prompt user to enter their API keys securely\n", - "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", - "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", - "\n", - "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "os.environ['GROQ_API_KEY'] = groq_api_key\n", - "\n", - "print(\"API keys have been set.\")\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-4c_AGBt3PlR", - "outputId": "21a26437-9f95-4478-84e9-ba4369956b6f" - }, - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", - "Please enter your GROQ API key: ยทยทยทยทยทยทยทยทยทยท\n", - "API keys have been set.\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "from dataclasses import dataclass\n", - "from typing import List\n", - "import numpy as np\n", - "import time\n", - "import asyncio\n", - "\n", - "\n", - "\n", - "def multiply(a: int, b: int) -> int:\n", - " \"\"\"Multiply two numbers.\"\"\"\n", - " time.sleep(1)\n", - " return a * b\n", - "\n", - "\n", - "def add(a: int, b: int) -> int:\n", - " \"\"\"Add two numbers.\"\"\"\n", - " time.sleep(1)\n", - " return a + b\n", - "\n", - "\n", - "async def divide(a: float, b: float) -> float:\n", - " \"\"\"Divide two numbers.\"\"\"\n", - " await asyncio.sleep(1)\n", - " return float(a) / b\n", - "\n", - "\n", - "async def search(query: str) -> List[str]:\n", - " \"\"\"Search for query and return a list of results.\"\"\"\n", - " await asyncio.sleep(1)\n", - " return [\"result1\" + query, \"result2\" + query]\n", - "\n", - "\n", - "def numpy_sum(arr: np.ndarray) -> float:\n", - " \"\"\"Sum the elements of an array.\"\"\"\n", - " return np.sum(arr)\n", - "\n", - "\n", - "x = 2\n", - "\n", - "@dataclass\n", - "class Point:\n", - " x: int\n", - " y: int\n", - "\n", - "\n", - "def add_points(p1: Point, p2: Point) -> Point:\n", - " return Point(p1.x + p2.x, p1.y + p2.y)" - ], - "metadata": { - "id": "GMKuuP7xR9Nt" - }, - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "##ย Function Tool" - ], - "metadata": { - "id": "jCA7HMjtT16P" - } - }, - { - "cell_type": "code", - "source": [ - "from adalflow.core.func_tool import FunctionTool\n", - "\n", - "functions =[multiply, add, divide, search, numpy_sum, add_points]\n", - "tools = [\n", - " FunctionTool(fn=fn) for fn in functions\n", - "]\n", - "for tool in tools:\n", - " print(tool)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fgOEoLoDSBqh", - "outputId": "7e636e2c-9a5d-44f1-f0fe-fe8a6bea474d" - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='multiply', func_desc='multiply(a: int, b: int) -> int\\nMultiply two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']}))\n", - "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add', func_desc='add(a: int, b: int) -> int\\nAdd two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']}))\n", - "FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='divide', func_desc='divide(a: float, b: float) -> float\\nDivide two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'float'}, 'b': {'type': 'float'}}, 'required': ['a', 'b']}))\n", - "FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='search', func_desc='search(query: str) -> List[str]\\nSearch for query and return a list of results.', func_parameters={'type': 'object', 'properties': {'query': {'type': 'str'}}, 'required': ['query']}))\n", - "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='numpy_sum', func_desc='numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', func_parameters={'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']}))\n", - "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add_points', func_desc='add_points(p1: __main__.Point, p2: __main__.Point) -> __main__.Point\\nNone', func_parameters={'type': 'object', 'properties': {'p1': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}, 'p2': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}}, 'required': ['p1', 'p2']}))\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "print(tools[-2].definition.to_dict())" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "CYJaHFhGSEzH", - "outputId": "9ab36c6c-7509-4e7f-ce85-11dae889c8c2" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{'func_name': 'numpy_sum', 'func_desc': 'numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', 'func_parameters': {'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']}}\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "context_map = {tool.definition.func_name: tool for tool in tools}" - ], - "metadata": { - "id": "_O4bQgXrSKb6" - }, - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "function_name = \"add\"\n", - "function_to_call = context_map[function_name]\n", - "function_args = {\"a\": 1, \"b\": 2}\n", - "function_response = function_to_call.call(**function_args)" - ], - "metadata": { - "id": "-RgWWMdISL1u" - }, - "execution_count": 8, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "from adalflow.core.tool_manager import ToolManager\n", - "\n", - "tool_manager = ToolManager(tools=functions)\n", - "print(tool_manager)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "6CT7Tez1SOai", - "outputId": "e486d882-9179-4db3-f077-6adfc9fc6579" - }, - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "ToolManager(Tools: [FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='multiply', func_desc='multiply(a: int, b: int) -> int\\nMultiply two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add', func_desc='add(a: int, b: int) -> int\\nAdd two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='divide', func_desc='divide(a: float, b: float) -> float\\nDivide two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'float'}, 'b': {'type': 'float'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='search', func_desc='search(query: str) -> List[str]\\nSearch for query and return a list of results.', func_parameters={'type': 'object', 'properties': {'query': {'type': 'str'}}, 'required': ['query']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='numpy_sum', func_desc='numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', func_parameters={'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add_points', func_desc='add_points(p1: __main__.Point, p2: __main__.Point) -> __main__.Point\\nNone', func_parameters={'type': 'object', 'properties': {'p1': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}, 'p2': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}}, 'required': ['p1', 'p2']}))], Additional Context: {})\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## ToolManager" - ], - "metadata": { - "id": "jzFqNnN_T-cu" - } - }, - { - "cell_type": "code", - "source": [ - "from adalflow.core.tool_manager import ToolManager\n", - "\n", - "tool_manager = ToolManager(tools=functions)\n", - "print(tool_manager)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JX7MibWiUF3U", - "outputId": "20707186-5ec3-49a4-d553-c3160c3daa84" - }, - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "ToolManager(Tools: [FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='multiply', func_desc='multiply(a: int, b: int) -> int\\nMultiply two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add', func_desc='add(a: int, b: int) -> int\\nAdd two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='divide', func_desc='divide(a: float, b: float) -> float\\nDivide two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'float'}, 'b': {'type': 'float'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='search', func_desc='search(query: str) -> List[str]\\nSearch for query and return a list of results.', func_parameters={'type': 'object', 'properties': {'query': {'type': 'str'}}, 'required': ['query']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='numpy_sum', func_desc='numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', func_parameters={'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add_points', func_desc='add_points(p1: __main__.Point, p2: __main__.Point) -> __main__.Point\\nNone', func_parameters={'type': 'object', 'properties': {'p1': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}, 'p2': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}}, 'required': ['p1', 'p2']}))], Additional Context: {})\n" - ] - } - ] + { + "cell_type": "code", + "source": [ + "from dataclasses import dataclass\n", + "from typing import List\n", + "import numpy as np\n", + "import time\n", + "import asyncio\n", + "\n", + "\n", + "def multiply(a: int, b: int) -> int:\n", + " \"\"\"Multiply two numbers.\"\"\"\n", + " time.sleep(1)\n", + " return a * b\n", + "\n", + "\n", + "def add(a: int, b: int) -> int:\n", + " \"\"\"Add two numbers.\"\"\"\n", + " time.sleep(1)\n", + " return a + b\n", + "\n", + "\n", + "async def divide(a: float, b: float) -> float:\n", + " \"\"\"Divide two numbers.\"\"\"\n", + " await asyncio.sleep(1)\n", + " return float(a) / b\n", + "\n", + "\n", + "async def search(query: str) -> List[str]:\n", + " \"\"\"Search for query and return a list of results.\"\"\"\n", + " await asyncio.sleep(1)\n", + " return [\"result1\" + query, \"result2\" + query]\n", + "\n", + "\n", + "def numpy_sum(arr: np.ndarray) -> float:\n", + " \"\"\"Sum the elements of an array.\"\"\"\n", + " return np.sum(arr)\n", + "\n", + "\n", + "x = 2\n", + "\n", + "\n", + "@dataclass\n", + "class Point:\n", + " x: int\n", + " y: int\n", + "\n", + "\n", + "def add_points(p1: Point, p2: Point) -> Point:\n", + " return Point(p1.x + p2.x, p1.y + p2.y)" + ], + "metadata": { + "id": "GMKuuP7xR9Nt" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "##ย Function Tool" + ], + "metadata": { + "id": "jCA7HMjtT16P" + } + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.func_tool import FunctionTool\n", + "\n", + "functions = [multiply, add, divide, search, numpy_sum, add_points]\n", + "tools = [FunctionTool(fn=fn) for fn in functions]\n", + "for tool in tools:\n", + " print(tool)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "fgOEoLoDSBqh", + "outputId": "7e636e2c-9a5d-44f1-f0fe-fe8a6bea474d" + }, + "execution_count": 5, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "## Function Call end-to-end" - ], - "metadata": { - "id": "9Bw2fs--UKX7" - } + "output_type": "stream", + "name": "stdout", + "text": [ + "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='multiply', func_desc='multiply(a: int, b: int) -> int\\nMultiply two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']}))\n", + "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add', func_desc='add(a: int, b: int) -> int\\nAdd two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']}))\n", + "FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='divide', func_desc='divide(a: float, b: float) -> float\\nDivide two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'float'}, 'b': {'type': 'float'}}, 'required': ['a', 'b']}))\n", + "FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='search', func_desc='search(query: str) -> List[str]\\nSearch for query and return a list of results.', func_parameters={'type': 'object', 'properties': {'query': {'type': 'str'}}, 'required': ['query']}))\n", + "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='numpy_sum', func_desc='numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', func_parameters={'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']}))\n", + "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add_points', func_desc='add_points(p1: __main__.Point, p2: __main__.Point) -> __main__.Point\\nNone', func_parameters={'type': 'object', 'properties': {'p1': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}, 'p2': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}}, 'required': ['p1', 'p2']}))\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(tools[-2].definition.to_dict())" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "CYJaHFhGSEzH", + "outputId": "9ab36c6c-7509-4e7f-ce85-11dae889c8c2" + }, + "execution_count": 6, + "outputs": [ { - "cell_type": "code", - "source": [ - "template = r\"\"\"You have these tools available:\n", - "{% if tools %}\n", - "\n", - "{% for tool in tools %}\n", - "{{ loop.index }}.\n", - "{{tool}}\n", - "------------------------\n", - "{% endfor %}\n", - "\n", - "{% endif %}\n", - "\n", - "{{output_format_str}}\n", - "\n", - "\n", - "User: {{input_str}}\n", - "You:\n", - "\"\"\"" - ], - "metadata": { - "id": "TywPQMIVUOqh" - }, - "execution_count": 11, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "{'func_name': 'numpy_sum', 'func_desc': 'numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', 'func_parameters': {'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']}}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "context_map = {tool.definition.func_name: tool for tool in tools}" + ], + "metadata": { + "id": "_O4bQgXrSKb6" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "function_name = \"add\"\n", + "function_to_call = context_map[function_name]\n", + "function_args = {\"a\": 1, \"b\": 2}\n", + "function_response = function_to_call.call(**function_args)" + ], + "metadata": { + "id": "-RgWWMdISL1u" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.tool_manager import ToolManager\n", + "\n", + "tool_manager = ToolManager(tools=functions)\n", + "print(tool_manager)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "6CT7Tez1SOai", + "outputId": "e486d882-9179-4db3-f077-6adfc9fc6579" + }, + "execution_count": 9, + "outputs": [ { - "cell_type": "code", - "source": [ - "from adalflow.core.prompt_builder import Prompt\n", - "\n", - "prompt = Prompt(template=template)\n", - "small_tool_manager = ToolManager(tools=tools[:2])\n", - "\n", - "renered_prompt = prompt(tools=small_tool_manager.yaml_definitions)\n", - "print(renered_prompt)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-vMajeXoUQ5A", - "outputId": "ca68601b-e9c8-41c3-a6fa-777f225e68e3" - }, - "execution_count": 12, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "You have these tools available:\n", - "\n", - "1.\n", - "func_name: multiply\n", - "func_desc: 'multiply(a: int, b: int) -> int\n", - "\n", - " Multiply two numbers.'\n", - "func_parameters:\n", - " type: object\n", - " properties:\n", - " a:\n", - " type: int\n", - " b:\n", - " type: int\n", - " required:\n", - " - a\n", - " - b\n", - "------------------------\n", - "2.\n", - "func_name: add\n", - "func_desc: 'add(a: int, b: int) -> int\n", - "\n", - " Add two numbers.'\n", - "func_parameters:\n", - " type: object\n", - " properties:\n", - " a:\n", - " type: int\n", - " b:\n", - " type: int\n", - " required:\n", - " - a\n", - " - b\n", - "------------------------\n", - "\n", - "\n", - "None\n", - "\n", - "\n", - "User: None\n", - "You:\n", - "\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "ToolManager(Tools: [FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='multiply', func_desc='multiply(a: int, b: int) -> int\\nMultiply two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add', func_desc='add(a: int, b: int) -> int\\nAdd two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='divide', func_desc='divide(a: float, b: float) -> float\\nDivide two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'float'}, 'b': {'type': 'float'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='search', func_desc='search(query: str) -> List[str]\\nSearch for query and return a list of results.', func_parameters={'type': 'object', 'properties': {'query': {'type': 'str'}}, 'required': ['query']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='numpy_sum', func_desc='numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', func_parameters={'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add_points', func_desc='add_points(p1: __main__.Point, p2: __main__.Point) -> __main__.Point\\nNone', func_parameters={'type': 'object', 'properties': {'p1': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}, 'p2': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}}, 'required': ['p1', 'p2']}))], Additional Context: {})\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## ToolManager" + ], + "metadata": { + "id": "jzFqNnN_T-cu" + } + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.tool_manager import ToolManager\n", + "\n", + "tool_manager = ToolManager(tools=functions)\n", + "print(tool_manager)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "JX7MibWiUF3U", + "outputId": "20707186-5ec3-49a4-d553-c3160c3daa84" + }, + "execution_count": 10, + "outputs": [ { - "cell_type": "code", - "source": [ - "from adalflow.core.types import Function\n", - "\n", - "output_data_class = Function\n", - "output_format_str = output_data_class.to_json_signature(exclude=[\"thought\", \"args\"])\n", - "\n", - "renered_prompt= prompt(output_format_str=output_format_str)\n", - "print(renered_prompt)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "V9-90IFRUUNT", - "outputId": "ed2f829e-c656-43c6-a454-8a7c32d5dafe" - }, - "execution_count": 13, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "You have these tools available:\n", - "\n", - "{\n", - " \"name\": \"The name of the function (str) (optional)\",\n", - " \"kwargs\": \"The keyword arguments of the function (Optional[Dict[str, object]]) (optional)\"\n", - "}\n", - "\n", - "\n", - "User: None\n", - "You:\n", - "\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "ToolManager(Tools: [FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='multiply', func_desc='multiply(a: int, b: int) -> int\\nMultiply two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add', func_desc='add(a: int, b: int) -> int\\nAdd two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='divide', func_desc='divide(a: float, b: float) -> float\\nDivide two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'float'}, 'b': {'type': 'float'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='search', func_desc='search(query: str) -> List[str]\\nSearch for query and return a list of results.', func_parameters={'type': 'object', 'properties': {'query': {'type': 'str'}}, 'required': ['query']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='numpy_sum', func_desc='numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', func_parameters={'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add_points', func_desc='add_points(p1: __main__.Point, p2: __main__.Point) -> __main__.Point\\nNone', func_parameters={'type': 'object', 'properties': {'p1': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}, 'p2': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}}, 'required': ['p1', 'p2']}))], Additional Context: {})\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Function Call end-to-end" + ], + "metadata": { + "id": "9Bw2fs--UKX7" + } + }, + { + "cell_type": "code", + "source": [ + "template = r\"\"\"You have these tools available:\n", + "{% if tools %}\n", + "\n", + "{% for tool in tools %}\n", + "{{ loop.index }}.\n", + "{{tool}}\n", + "------------------------\n", + "{% endfor %}\n", + "\n", + "{% endif %}\n", + "\n", + "{{output_format_str}}\n", + "\n", + "\n", + "User: {{input_str}}\n", + "You:\n", + "\"\"\"" + ], + "metadata": { + "id": "TywPQMIVUOqh" + }, + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.prompt_builder import Prompt\n", + "\n", + "prompt = Prompt(template=template)\n", + "small_tool_manager = ToolManager(tools=tools[:2])\n", + "\n", + "renered_prompt = prompt(tools=small_tool_manager.yaml_definitions)\n", + "print(renered_prompt)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "-vMajeXoUQ5A", + "outputId": "ca68601b-e9c8-41c3-a6fa-777f225e68e3" + }, + "execution_count": 12, + "outputs": [ { - "cell_type": "code", - "source": [ - "from adalflow.core.types import FunctionExpression\n", - "\n", - "output_data_class = FunctionExpression\n", - "output_format_str = output_data_class.to_json_signature(exclude=[\"thought\"])\n", - "print(prompt(output_format_str=output_format_str))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "p3kPMhWaUYT1", - "outputId": "a3de7117-c3eb-404e-e2e7-8a5187b32f6b" - }, - "execution_count": 14, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "You have these tools available:\n", - "\n", - "{\n", - " \"action\": \"FuncName() Valid function call expression. Example: \\\"FuncName(a=1, b=2)\\\" Follow the data type specified in the function parameters.e.g. for Type object with x,y properties, use \\\"ObjectType(x=1, y=2) (str) (required)\"\n", - "}\n", - "\n", - "\n", - "User: None\n", - "You:\n", - "\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "You have these tools available:\n", + "\n", + "1.\n", + "func_name: multiply\n", + "func_desc: 'multiply(a: int, b: int) -> int\n", + "\n", + " Multiply two numbers.'\n", + "func_parameters:\n", + " type: object\n", + " properties:\n", + " a:\n", + " type: int\n", + " b:\n", + " type: int\n", + " required:\n", + " - a\n", + " - b\n", + "------------------------\n", + "2.\n", + "func_name: add\n", + "func_desc: 'add(a: int, b: int) -> int\n", + "\n", + " Add two numbers.'\n", + "func_parameters:\n", + " type: object\n", + " properties:\n", + " a:\n", + " type: int\n", + " b:\n", + " type: int\n", + " required:\n", + " - a\n", + " - b\n", + "------------------------\n", + "\n", + "\n", + "None\n", + "\n", + "\n", + "User: None\n", + "You:\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.types import Function\n", + "\n", + "output_data_class = Function\n", + "output_format_str = output_data_class.to_json_signature(exclude=[\"thought\", \"args\"])\n", + "\n", + "renered_prompt = prompt(output_format_str=output_format_str)\n", + "print(renered_prompt)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "V9-90IFRUUNT", + "outputId": "ed2f829e-c656-43c6-a454-8a7c32d5dafe" + }, + "execution_count": 13, + "outputs": [ { - "cell_type": "code", - "source": [ - "from adalflow.components.output_parsers import JsonOutputParser\n", - "\n", - "func_parser = JsonOutputParser(data_class=Function, exclude_fields=[\"thought\", \"args\"])\n", - "instructions = func_parser.format_instructions()\n", - "print(instructions)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "MvGyoUmMUatR", - "outputId": "e819866b-f6e3-4c88-f9f1-22d725a28865" - }, - "execution_count": 17, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Your output should be formatted as a standard JSON instance with the following schema:\n", - "```\n", - "{\n", - " \"name\": \"The name of the function (str) (optional)\",\n", - " \"kwargs\": \"The keyword arguments of the function (Optional[Dict[str, object]]) (optional)\"\n", - "}\n", - "```\n", - "-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\n", - "-Use double quotes for the keys and string values.\n", - "-DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\n", - "-Follow the JSON formatting conventions.\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "You have these tools available:\n", + "\n", + "{\n", + " \"name\": \"The name of the function (str) (optional)\",\n", + " \"kwargs\": \"The keyword arguments of the function (Optional[Dict[str, object]]) (optional)\"\n", + "}\n", + "\n", + "\n", + "User: None\n", + "You:\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.types import FunctionExpression\n", + "\n", + "output_data_class = FunctionExpression\n", + "output_format_str = output_data_class.to_json_signature(exclude=[\"thought\"])\n", + "print(prompt(output_format_str=output_format_str))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "p3kPMhWaUYT1", + "outputId": "a3de7117-c3eb-404e-e2e7-8a5187b32f6b" + }, + "execution_count": 14, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "## Function Output Format" - ], - "metadata": { - "id": "9W7DiGcpUme5" - } + "output_type": "stream", + "name": "stdout", + "text": [ + "You have these tools available:\n", + "\n", + "{\n", + " \"action\": \"FuncName() Valid function call expression. Example: \\\"FuncName(a=1, b=2)\\\" Follow the data type specified in the function parameters.e.g. for Type object with x,y properties, use \\\"ObjectType(x=1, y=2) (str) (required)\"\n", + "}\n", + "\n", + "\n", + "User: None\n", + "You:\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.components.output_parsers import JsonOutputParser\n", + "\n", + "func_parser = JsonOutputParser(data_class=Function, exclude_fields=[\"thought\", \"args\"])\n", + "instructions = func_parser.format_instructions()\n", + "print(instructions)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "MvGyoUmMUatR", + "outputId": "e819866b-f6e3-4c88-f9f1-22d725a28865" + }, + "execution_count": 17, + "outputs": [ { - "cell_type": "code", - "source": [ - "from adalflow.core.generator import Generator\n", - "from adalflow.core.types import ModelClientType\n", - "\n", - "model_kwargs = {\"model\": \"gpt-4o-mini\"}\n", - "prompt_kwargs = {\n", - " \"tools\": tool_manager.yaml_definitions,\n", - " \"output_format_str\": func_parser.format_instructions(),\n", - "}\n", - "generator = Generator(\n", - " model_client=ModelClientType.OPENAI(),\n", - " model_kwargs=model_kwargs,\n", - " template=template,\n", - " prompt_kwargs=prompt_kwargs,\n", - " output_processors=func_parser,\n", - ")" - ], - "metadata": { - "id": "z5tNhoruUp6o" - }, - "execution_count": 20, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "Your output should be formatted as a standard JSON instance with the following schema:\n", + "```\n", + "{\n", + " \"name\": \"The name of the function (str) (optional)\",\n", + " \"kwargs\": \"The keyword arguments of the function (Optional[Dict[str, object]]) (optional)\"\n", + "}\n", + "```\n", + "-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\n", + "-Use double quotes for the keys and string values.\n", + "-DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\n", + "-Follow the JSON formatting conventions.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Function Output Format" + ], + "metadata": { + "id": "9W7DiGcpUme5" + } + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.generator import Generator\n", + "from adalflow.core.types import ModelClientType\n", + "\n", + "model_kwargs = {\"model\": \"gpt-4o-mini\"}\n", + "prompt_kwargs = {\n", + " \"tools\": tool_manager.yaml_definitions,\n", + " \"output_format_str\": func_parser.format_instructions(),\n", + "}\n", + "generator = Generator(\n", + " model_client=ModelClientType.OPENAI(),\n", + " model_kwargs=model_kwargs,\n", + " template=template,\n", + " prompt_kwargs=prompt_kwargs,\n", + " output_processors=func_parser,\n", + ")" + ], + "metadata": { + "id": "z5tNhoruUp6o" + }, + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "queries = [\n", + " \"add 2 and 3\",\n", + " \"search for something\",\n", + " \"add points (1, 2) and (3, 4)\",\n", + " \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\",\n", + " \"multiply 2 with local variable x\",\n", + " \"divide 2 by 3\",\n", + " \"Add 5 to variable y\",\n", + "]\n", + "\n", + "for idx, query in enumerate(queries):\n", + " prompt_kwargs = {\"input_str\": query}\n", + " print(f\"\\n{idx} Query: {query}\")\n", + " print(f\"{'-'*50}\")\n", + " try:\n", + " result = generator(prompt_kwargs=prompt_kwargs)\n", + " # print(f\"LLM raw output: {result.raw_response}\")\n", + " func = Function.from_dict(result.data)\n", + " print(f\"Function: {func}\")\n", + " func_output = tool_manager.execute_func(func)\n", + " print(f\"Function output: {func_output}\")\n", + " except Exception as e:\n", + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "9DCukn1SUs_x", + "outputId": "dcfd952c-0699-4d79-ee6d-a59373e3c75d" + }, + "execution_count": 21, + "outputs": [ { - "cell_type": "code", - "source": [ - "queries = [\n", - " \"add 2 and 3\",\n", - " \"search for something\",\n", - " \"add points (1, 2) and (3, 4)\",\n", - " \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\",\n", - " \"multiply 2 with local variable x\",\n", - " \"divide 2 by 3\",\n", - " \"Add 5 to variable y\",\n", - "]\n", - "\n", - "for idx, query in enumerate(queries):\n", - " prompt_kwargs = {\"input_str\": query}\n", - " print(f\"\\n{idx} Query: {query}\")\n", - " print(f\"{'-'*50}\")\n", - " try:\n", - " result = generator(prompt_kwargs=prompt_kwargs)\n", - " # print(f\"LLM raw output: {result.raw_response}\")\n", - " func = Function.from_dict(result.data)\n", - " print(f\"Function: {func}\")\n", - " func_output = tool_manager.execute_func(func)\n", - " print(f\"Function output: {func_output}\")\n", - " except Exception as e:\n", - " print(\n", - " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", - " )" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "9DCukn1SUs_x", - "outputId": "dcfd952c-0699-4d79-ee6d-a59373e3c75d" - }, - "execution_count": 21, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "0 Query: add 2 and 3\n", - "--------------------------------------------------\n", - "Function: Function(thought=None, name='add', args=[], kwargs={'a': 2, 'b': 3})\n", - "Function output: FunctionOutput(name='add', input=Function(thought=None, name='add', args=(), kwargs={'a': 2, 'b': 3}), parsed_input=None, output=5, error=None)\n", - "\n", - "1 Query: search for something\n", - "--------------------------------------------------\n", - "Function: Function(thought=None, name='search', args=[], kwargs={'query': 'something'})\n", - "Function output: FunctionOutput(name='search', input=Function(thought=None, name='search', args=(), kwargs={'query': 'something'}), parsed_input=None, output=['result1something', 'result2something'], error=None)\n", - "\n", - "2 Query: add points (1, 2) and (3, 4)\n", - "--------------------------------------------------\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "ERROR:adalflow.core.func_tool:Error at calling : 'dict' object has no attribute 'x'\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Function: Function(thought=None, name='add_points', args=[], kwargs={'p1': {'x': 1, 'y': 2}, 'p2': {'x': 3, 'y': 4}})\n", - "Function output: FunctionOutput(name='add_points', input=Function(thought=None, name='add_points', args=(), kwargs={'p1': {'x': 1, 'y': 2}, 'p2': {'x': 3, 'y': 4}}), parsed_input=None, output=None, error=\"'dict' object has no attribute 'x'\")\n", - "\n", - "3 Query: sum numpy array with arr = np.array([[1, 2], [3, 4]])\n", - "--------------------------------------------------\n", - "Function: Function(thought=None, name='numpy_sum', args=[], kwargs={'arr': [[1, 2], [3, 4]]})\n", - "Function output: FunctionOutput(name='numpy_sum', input=Function(thought=None, name='numpy_sum', args=(), kwargs={'arr': [[1, 2], [3, 4]]}), parsed_input=None, output=10, error=None)\n", - "\n", - "4 Query: multiply 2 with local variable x\n", - "--------------------------------------------------\n", - "Function: Function(thought=None, name='multiply', args=[], kwargs={'a': 2, 'b': 'x'})\n", - "Function output: FunctionOutput(name='multiply', input=Function(thought=None, name='multiply', args=(), kwargs={'a': 2, 'b': 'x'}), parsed_input=None, output='xx', error=None)\n", - "\n", - "5 Query: divide 2 by 3\n", - "--------------------------------------------------\n", - "Function: Function(thought=None, name='divide', args=[], kwargs={'a': 2.0, 'b': 3.0})\n", - "Function output: FunctionOutput(name='divide', input=Function(thought=None, name='divide', args=(), kwargs={'a': 2.0, 'b': 3.0}), parsed_input=None, output=0.6666666666666666, error=None)\n", - "\n", - "6 Query: Add 5 to variable y\n", - "--------------------------------------------------\n", - "Function: Function(thought=None, name='add', args=[], kwargs={'a': 5, 'b': 'y'})\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "ERROR:adalflow.core.func_tool:Error at calling : unsupported operand type(s) for +: 'int' and 'str'\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Function output: FunctionOutput(name='add', input=Function(thought=None, name='add', args=(), kwargs={'a': 5, 'b': 'y'}), parsed_input=None, output=None, error=\"unsupported operand type(s) for +: 'int' and 'str'\")\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "0 Query: add 2 and 3\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='add', args=[], kwargs={'a': 2, 'b': 3})\n", + "Function output: FunctionOutput(name='add', input=Function(thought=None, name='add', args=(), kwargs={'a': 2, 'b': 3}), parsed_input=None, output=5, error=None)\n", + "\n", + "1 Query: search for something\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='search', args=[], kwargs={'query': 'something'})\n", + "Function output: FunctionOutput(name='search', input=Function(thought=None, name='search', args=(), kwargs={'query': 'something'}), parsed_input=None, output=['result1something', 'result2something'], error=None)\n", + "\n", + "2 Query: add points (1, 2) and (3, 4)\n", + "--------------------------------------------------\n" + ] }, { - "cell_type": "markdown", - "source": [ - "## FunctionExpression Output Format" - ], - "metadata": { - "id": "O-sBTPATUwsD" - } + "output_type": "stream", + "name": "stderr", + "text": [ + "ERROR:adalflow.core.func_tool:Error at calling : 'dict' object has no attribute 'x'\n" + ] }, { - "cell_type": "code", - "source": [ - "tool_manager = ToolManager(\n", - " tools=functions,\n", - " additional_context={\"x\": x, \"y\": 0, \"np.array\": np.array, \"np\": np},\n", - ")\n", - "func_parser = JsonOutputParser(data_class=FunctionExpression)" - ], - "metadata": { - "id": "TVRZ44N1UyWg" - }, - "execution_count": 22, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "Function: Function(thought=None, name='add_points', args=[], kwargs={'p1': {'x': 1, 'y': 2}, 'p2': {'x': 3, 'y': 4}})\n", + "Function output: FunctionOutput(name='add_points', input=Function(thought=None, name='add_points', args=(), kwargs={'p1': {'x': 1, 'y': 2}, 'p2': {'x': 3, 'y': 4}}), parsed_input=None, output=None, error=\"'dict' object has no attribute 'x'\")\n", + "\n", + "3 Query: sum numpy array with arr = np.array([[1, 2], [3, 4]])\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='numpy_sum', args=[], kwargs={'arr': [[1, 2], [3, 4]]})\n", + "Function output: FunctionOutput(name='numpy_sum', input=Function(thought=None, name='numpy_sum', args=(), kwargs={'arr': [[1, 2], [3, 4]]}), parsed_input=None, output=10, error=None)\n", + "\n", + "4 Query: multiply 2 with local variable x\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='multiply', args=[], kwargs={'a': 2, 'b': 'x'})\n", + "Function output: FunctionOutput(name='multiply', input=Function(thought=None, name='multiply', args=(), kwargs={'a': 2, 'b': 'x'}), parsed_input=None, output='xx', error=None)\n", + "\n", + "5 Query: divide 2 by 3\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='divide', args=[], kwargs={'a': 2.0, 'b': 3.0})\n", + "Function output: FunctionOutput(name='divide', input=Function(thought=None, name='divide', args=(), kwargs={'a': 2.0, 'b': 3.0}), parsed_input=None, output=0.6666666666666666, error=None)\n", + "\n", + "6 Query: Add 5 to variable y\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='add', args=[], kwargs={'a': 5, 'b': 'y'})\n" + ] }, { - "cell_type": "code", - "source": [ - "context = r\"\"\"\n", - "Your function expression also have access to these context:\n", - "{{context_str}}\n", - "\n", - "\"\"\"" - ], - "metadata": { - "id": "9h47p4XpU2BC" - }, - "execution_count": 23, - "outputs": [] + "output_type": "stream", + "name": "stderr", + "text": [ + "ERROR:adalflow.core.func_tool:Error at calling : unsupported operand type(s) for +: 'int' and 'str'\n" + ] }, { - "cell_type": "code", - "source": [ - "async def run_async_function_call(self, generator, tool_manager):\n", - " answers = []\n", - " start_time = time.time()\n", - " tasks = []\n", - " for idx, query in enumerate(queries):\n", - " tasks.append(self.process_query(idx, query, generator, tool_manager))\n", - "\n", - " results = await asyncio.gather(*tasks)\n", - " answers.extend(results)\n", - " end_time = time.time()\n", - " print(f\"Total time taken: {end_time - start_time :.2f} seconds\")\n", - " return answers\n", - "\n", - "async def process_query(self, idx, query, generator, tool_manager: ToolManager):\n", - " print(f\"\\n{idx} Query: {query}\")\n", - " print(f\"{'-'*50}\")\n", - " try:\n", - " result = generator(prompt_kwargs={\"input_str\": query})\n", - " func_expr = FunctionExpression.from_dict(result.data)\n", - " print(f\"Function_expr: {func_expr}\")\n", - " func = tool_manager.parse_func_expr(func_expr)\n", - " func_output = await tool_manager.execute_func_async(func)\n", - " print(f\"Function output: {func_output}\")\n", - " return func_output\n", - " except Exception as e:\n", - " print(\n", - " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", - " )\n", - " return None" - ], - "metadata": { - "id": "n9Qq7wcOU4X9" - }, - "execution_count": 24, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "Function output: FunctionOutput(name='add', input=Function(thought=None, name='add', args=(), kwargs={'a': 5, 'b': 'y'}), parsed_input=None, output=None, error=\"unsupported operand type(s) for +: 'int' and 'str'\")\n" + ] } - ] + ] + }, + { + "cell_type": "markdown", + "source": [ + "## FunctionExpression Output Format" + ], + "metadata": { + "id": "O-sBTPATUwsD" + } + }, + { + "cell_type": "code", + "source": [ + "tool_manager = ToolManager(\n", + " tools=functions,\n", + " additional_context={\"x\": x, \"y\": 0, \"np.array\": np.array, \"np\": np},\n", + ")\n", + "func_parser = JsonOutputParser(data_class=FunctionExpression)" + ], + "metadata": { + "id": "TVRZ44N1UyWg" + }, + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "context = r\"\"\"\n", + "Your function expression also have access to these context:\n", + "{{context_str}}\n", + "\n", + "\"\"\"" + ], + "metadata": { + "id": "9h47p4XpU2BC" + }, + "execution_count": 23, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "async def run_async_function_call(self, generator, tool_manager):\n", + " answers = []\n", + " start_time = time.time()\n", + " tasks = []\n", + " for idx, query in enumerate(queries):\n", + " tasks.append(self.process_query(idx, query, generator, tool_manager))\n", + "\n", + " results = await asyncio.gather(*tasks)\n", + " answers.extend(results)\n", + " end_time = time.time()\n", + " print(f\"Total time taken: {end_time - start_time :.2f} seconds\")\n", + " return answers\n", + "\n", + "\n", + "async def process_query(self, idx, query, generator, tool_manager: ToolManager):\n", + " print(f\"\\n{idx} Query: {query}\")\n", + " print(f\"{'-'*50}\")\n", + " try:\n", + " result = generator(prompt_kwargs={\"input_str\": query})\n", + " func_expr = FunctionExpression.from_dict(result.data)\n", + " print(f\"Function_expr: {func_expr}\")\n", + " func = tool_manager.parse_func_expr(func_expr)\n", + " func_output = await tool_manager.execute_func_async(func)\n", + " print(f\"Function output: {func_output}\")\n", + " return func_output\n", + " except Exception as e:\n", + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )\n", + " return None" + ], + "metadata": { + "id": "n9Qq7wcOU4X9" + }, + "execution_count": 24, + "outputs": [] + } + ] } diff --git a/notebooks/tutorials/adalflow_logger.ipynb b/notebooks/tutorials/adalflow_logger.ipynb index 135d6450..ae5a7d83 100644 --- a/notebooks/tutorials/adalflow_logger.ipynb +++ b/notebooks/tutorials/adalflow_logger.ipynb @@ -1,242 +1,246 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Adalflow RAG Playbook example\n", + "\n", + "There are different patterns to build a RAG:\n", + "\n", + "- RAG with separate data process pipeline and a RAG task pipeline. This fits into a scenario where there is lots of data in production database, and we preprocess the data to embeddings and then we build a RAG task pipeline that retrieves context in multiple stages.\n", + "\n", + "- RAG with dynamic data access and caching the embedding dynamically in a local storage.\n", + "\n", + "Here we will have have a look at an example with a local DB using FAISS" + ], + "metadata": { + "id": "lLGpv1fLLIjF" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "sfKEfaYC3Go7" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai,groq,faiss-cpu]\n", + "\n", + "clear_output()" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", + "\n", + "# Set environment variables\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "os.environ[\"GROQ_API_KEY\"] = groq_api_key\n", + "\n", + "print(\"API keys have been set.\")" + ], + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python" + "id": "-4c_AGBt3PlR", + "outputId": "275b050a-ce64-4b40-a5f9-4ccc12d92add" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", + "Please enter your GROQ API key: ยทยทยทยทยทยทยทยทยทยท\n", + "API keys have been set.\n" + ] } + ] }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Adalflow RAG Playbook example\n", - "\n", - "There are different patterns to build a RAG:\n", - "\n", - "- RAG with separate data process pipeline and a RAG task pipeline. This fits into a scenario where there is lots of data in production database, and we preprocess the data to embeddings and then we build a RAG task pipeline that retrieves context in multiple stages.\n", - "\n", - "- RAG with dynamic data access and caching the embedding dynamically in a local storage.\n", - "\n", - "Here we will have have a look at an example with a local DB using FAISS" - ], - "metadata": { - "id": "lLGpv1fLLIjF" - } - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "sfKEfaYC3Go7" - }, - "outputs": [], - "source": [ - "from IPython.display import clear_output\n", - "\n", - "!pip install -U adalflow[openai,groq,faiss-cpu]\n", - "\n", - "clear_output()\n" - ] - }, - { - "cell_type": "code", - "source": [ - "import os\n", - "from getpass import getpass\n", - "\n", - "# Prompt user to enter their API keys securely\n", - "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", - "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", - "\n", - "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "os.environ['GROQ_API_KEY'] = groq_api_key\n", - "\n", - "print(\"API keys have been set.\")\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-4c_AGBt3PlR", - "outputId": "275b050a-ce64-4b40-a5f9-4ccc12d92add" - }, - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", - "Please enter your GROQ API key: ยทยทยทยทยทยทยทยทยทยท\n", - "API keys have been set.\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Design\n", - "\n", - "Some libraries may use hooks [2] and callbacks [3] [4], or advanced web-based debugging tools [5] [6] [7]. Hooks and callbacks are conceptually similar in that they both allow users to execute custom code at specific points during the execution of a program. Both provide mechanisms to inject additional behavior in response to certain events or conditions, without modifying the core logic. PyTorch defines, registers, and executes hooks mainly in its base classes like nn.Module and Tensor, without polluting the functional and user-facing APIs.\n", - "\n", - "At this point, our objectives are:\n", - "\n", - "1. Maximize debugging capabilities via the simple logging module to keep the source code clean.\n", - "\n", - "2. Additionally, as we canโ€™t always control the outputs of generators, we will provide customized logger and tracers(drop-in decorators) for them, for which we will explain in Tracing. This will not break the first objective.\n", - "\n", - "In the future, when we have more complex requirements from users, we will consider adding hooks/callbacks but we will do it in a way to keep the functional and user-facing APIs clean." - ], - "metadata": { - "id": "4NztjiLR_EQE" - } - }, - { - "cell_type": "code", - "source": [ - "import logging\n", - "\n", - "log = logging.getLogger(__name__)" - ], - "metadata": { - "id": "d2H1vYoC_F-g" - }, - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "from adalflow.utils.logger import get_logger\n", - "\n", - "\n", - "root_logger = get_logger()" - ], - "metadata": { - "id": "e2GxAapG_TJH" - }, - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "from adalflow.utils.logger import printc\n", - "\n", - "printc(\"All logging examples are done. Feeling green!\", color=\"green\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Yk4oiBFE_asG", - "outputId": "470e30dc-1b31-40c1-9e48-30754ae54b45" - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[32m2024-11-28 13:39:41 - [:3:] - All logging examples are done. Feeling green!\u001b[0m\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Set up all logs in one file\n", - "\n", - "Assume your source code is at src/task.py. You can log simply by:" - ], - "metadata": { - "id": "B8lmlT_9_nVP" - } - }, - { - "cell_type": "code", - "source": [ - "import logging\n", - "\n", - "log = logging.getLogger(__name__)\n", - "\n", - "class Task:\n", - " def __init__(self):\n", - " log.info(\"This is a user program child logger\")" - ], - "metadata": { - "id": "o_Ru1myM_c-J" - }, - "execution_count": 6, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "import logging\n", - "from adalflow.utils.logger import get_logger\n", - "\n", - "root_logger = get_logger(level=\"DEBUG\", save_dir=\"./logs\") # log to ./logs/lib.log\n", - "\n", - "# run code from the library components such as generator\n", - "# ....\n", - "\n", - "root_logger.info(\"This is the log in the main file\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "o7YPjEZk_ehg", - "outputId": "ad0f58e9-6f5c-4d00-e737-2fa1ad5ebd85" - }, - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "2024-11-28 13:39:46 - - INFO - [:9:] - This is the log in the main file\n" - ] - } - ] + { + "cell_type": "markdown", + "source": [ + "## Design\n", + "\n", + "Some libraries may use hooks [2] and callbacks [3] [4], or advanced web-based debugging tools [5] [6] [7]. Hooks and callbacks are conceptually similar in that they both allow users to execute custom code at specific points during the execution of a program. Both provide mechanisms to inject additional behavior in response to certain events or conditions, without modifying the core logic. PyTorch defines, registers, and executes hooks mainly in its base classes like nn.Module and Tensor, without polluting the functional and user-facing APIs.\n", + "\n", + "At this point, our objectives are:\n", + "\n", + "1. Maximize debugging capabilities via the simple logging module to keep the source code clean.\n", + "\n", + "2. Additionally, as we canโ€™t always control the outputs of generators, we will provide customized logger and tracers(drop-in decorators) for them, for which we will explain in Tracing. This will not break the first objective.\n", + "\n", + "In the future, when we have more complex requirements from users, we will consider adding hooks/callbacks but we will do it in a way to keep the functional and user-facing APIs clean." + ], + "metadata": { + "id": "4NztjiLR_EQE" + } + }, + { + "cell_type": "code", + "source": [ + "import logging\n", + "\n", + "log = logging.getLogger(__name__)" + ], + "metadata": { + "id": "d2H1vYoC_F-g" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.utils.logger import get_logger\n", + "\n", + "\n", + "root_logger = get_logger()" + ], + "metadata": { + "id": "e2GxAapG_TJH" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.utils.logger import printc\n", + "\n", + "printc(\"All logging examples are done. Feeling green!\", color=\"green\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "Yk4oiBFE_asG", + "outputId": "470e30dc-1b31-40c1-9e48-30754ae54b45" + }, + "execution_count": 5, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "Separate library and application logs" - ], - "metadata": { - "id": "Db1_Ob3X_gpe" - } + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2024-11-28 13:39:41 - [:3:] - All logging examples are done. Feeling green!\u001b[0m\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Set up all logs in one file\n", + "\n", + "Assume your source code is at src/task.py. You can log simply by:" + ], + "metadata": { + "id": "B8lmlT_9_nVP" + } + }, + { + "cell_type": "code", + "source": [ + "import logging\n", + "\n", + "log = logging.getLogger(__name__)\n", + "\n", + "\n", + "class Task:\n", + " def __init__(self):\n", + " log.info(\"This is a user program child logger\")" + ], + "metadata": { + "id": "o_Ru1myM_c-J" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import logging\n", + "from adalflow.utils.logger import get_logger\n", + "\n", + "root_logger = get_logger(level=\"DEBUG\", save_dir=\"./logs\") # log to ./logs/lib.log\n", + "\n", + "# run code from the library components such as generator\n", + "# ....\n", + "\n", + "root_logger.info(\"This is the log in the main file\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "o7YPjEZk_ehg", + "outputId": "ad0f58e9-6f5c-4d00-e737-2fa1ad5ebd85" + }, + "execution_count": 7, + "outputs": [ { - "cell_type": "code", - "source": [ - "from adalflow.utils.logger import get_logger\n", - "\n", - "app_logger = get_logger(name=\"my_app\", level=\"DEBUG\", save_dir=\"./logs\") # log to ./logs/my_app.log\n", - "\n", - "class Task:\n", - " def __init__(self):\n", - " app_logger.info(\"This is a user program child logger\")" - ], - "metadata": { - "id": "rQWuFnUc_gNm" - }, - "execution_count": 8, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "2024-11-28 13:39:46 - - INFO - [:9:] - This is the log in the main file\n" + ] } - ] + ] + }, + { + "cell_type": "markdown", + "source": [ + "Separate library and application logs" + ], + "metadata": { + "id": "Db1_Ob3X_gpe" + } + }, + { + "cell_type": "code", + "source": [ + "from adalflow.utils.logger import get_logger\n", + "\n", + "app_logger = get_logger(\n", + " name=\"my_app\", level=\"DEBUG\", save_dir=\"./logs\"\n", + ") # log to ./logs/my_app.log\n", + "\n", + "\n", + "class Task:\n", + " def __init__(self):\n", + " app_logger.info(\"This is a user program child logger\")" + ], + "metadata": { + "id": "rQWuFnUc_gNm" + }, + "execution_count": 8, + "outputs": [] + } + ] } diff --git a/notebooks/tutorials/adalflow_rag_optimization.ipynb b/notebooks/tutorials/adalflow_rag_optimization.ipynb index 7ae0b152..34d208bf 100644 --- a/notebooks/tutorials/adalflow_rag_optimization.ipynb +++ b/notebooks/tutorials/adalflow_rag_optimization.ipynb @@ -1,495 +1,498 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# ๐Ÿค— Welcome to AdalFlow!\n", + "## The PyTorch library to auto-optimize any LLM task pipelines\n", + "\n", + "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of ๐Ÿ˜Š any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! โญ Star us on Github โญ\n", + "\n", + "\n", + "# Quick Links\n", + "\n", + "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n", + "\n", + "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n", + "\n", + "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n", + "\n", + "Common use cases along with the auto-optimization: check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n", + "\n", + "## ๐Ÿ“– Outline\n", + "\n", + "In this tutorial, we will cover the auto-optimization of a standard RAG:\n", + "\n", + "- Introducing HotPotQA dataset and HotPotQAData class.\n", + "\n", + "- Convert Dspyโ€™s Retriever to AdalFlowโ€™s Retriever to easy comparison.\n", + "\n", + "- Build the standard RAG with Retriever and Generator components.\n", + "\n", + "- Learn how to connect the output-input between components to enable auto-text-grad optimization." + ], + "metadata": { + "id": "xHF95Kr4CzGq" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "# Installation\n", + "\n", + "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq` from the extra packages.\n", + "\n", + " ```bash\n", + " pip install adalflow[openai,groq]\n", + " ```\n", + "2. Setup `openai` and `groq` API key in the environment variables\n", + "\n", + "You can choose to use different client. You can import the model client you prefer. We support `Anthropic`, `Cohere`, `Google`, `GROQ`, `OpenAI`, `Transformer` and more in development. We will use OpenAI here as an example.Please refer to our [full installation guide](https://adalflow.sylph.ai/get_started/installation.html)" + ], + "metadata": { + "id": "Kof5M6DRaKhh" + } + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "tAp3eDjOCma1" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai] # also install the package for the model client you'll use\n", + "!pip install dspy\n", + "!pip install datasets\n", + "clear_output()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Set Environment Variables\n", + "\n", + "Run the following code and pass your api key.\n", + "\n", + "Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n", + "\n", + "*Go to [OpenAI](https://platform.openai.com/docs/introduction) to get API keys if you don't already have.*" + ], + "metadata": { + "id": "KapUyHMM07pJ" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "\n", + "\n", + "# Set environment variables\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "\n", + "print(\"API keys have been set.\")" + ], + "metadata": { "colab": { - "provenance": [] + "base_uri": "https://localhost:8080/" }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" + "id": "ONfzF9Puzdd_", + "outputId": "5fc0cd30-9ae7-443a-c06c-31e9edeafd69" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", + "API keys have been set.\n" + ] } + ] }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# ๐Ÿค— Welcome to AdalFlow!\n", - "## The PyTorch library to auto-optimize any LLM task pipelines\n", - "\n", - "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of ๐Ÿ˜Š any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! โญ Star us on Github โญ\n", - "\n", - "\n", - "# Quick Links\n", - "\n", - "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n", - "\n", - "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n", - "\n", - "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n", - "\n", - "Common use cases along with the auto-optimization: check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n", - "\n", - "## ๐Ÿ“– Outline\n", - "\n", - "In this tutorial, we will cover the auto-optimization of a standard RAG:\n", - "\n", - "- Introducing HotPotQA dataset and HotPotQAData class.\n", - "\n", - "- Convert Dspyโ€™s Retriever to AdalFlowโ€™s Retriever to easy comparison.\n", - "\n", - "- Build the standard RAG with Retriever and Generator components.\n", - "\n", - "- Learn how to connect the output-input between components to enable auto-text-grad optimization." - ], - "metadata": { - "id": "xHF95Kr4CzGq" - } - }, - { - "cell_type": "markdown", - "source": [ - "\n", - "# Installation\n", - "\n", - "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq` from the extra packages.\n", - "\n", - " ```bash\n", - " pip install adalflow[openai,groq]\n", - " ```\n", - "2. Setup `openai` and `groq` API key in the environment variables\n", - "\n", - "You can choose to use different client. You can import the model client you prefer. We support `Anthropic`, `Cohere`, `Google`, `GROQ`, `OpenAI`, `Transformer` and more in development. We will use OpenAI here as an example.Please refer to our [full installation guide](https://adalflow.sylph.ai/get_started/installation.html)" - ], - "metadata": { - "id": "Kof5M6DRaKhh" - } - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "id": "tAp3eDjOCma1" - }, - "outputs": [], - "source": [ - "from IPython.display import clear_output\n", - "\n", - "!pip install -U adalflow[openai] # also install the package for the model client you'll use\n", - "!pip install dspy\n", - "!pip install datasets\n", - "clear_output()" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Set Environment Variables\n", - "\n", - "Run the following code and pass your api key.\n", - "\n", - "Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n", - "\n", - "*Go to [OpenAI](https://platform.openai.com/docs/introduction) to get API keys if you don't already have.*" - ], - "metadata": { - "id": "KapUyHMM07pJ" - } - }, - { - "cell_type": "code", - "source": [ - "import os\n", - "\n", - "from getpass import getpass\n", - "\n", - "# Prompt user to enter their API keys securely\n", - "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", - "\n", - "\n", - "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "\n", - "print(\"API keys have been set.\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ONfzF9Puzdd_", - "outputId": "5fc0cd30-9ae7-443a-c06c-31e9edeafd69" - }, - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", - "API keys have been set.\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "import dspy\n", - "import re\n", - "from typing import List, Union, Optional, Dict, Callable, Any, Tuple\n", - "from dataclasses import dataclass, field\n", - "import adalflow as adal\n", - "from adalflow.optim.parameter import Parameter, ParameterType\n", - "from adalflow.datasets.hotpot_qa import HotPotQA, HotPotQAData\n", - "from adalflow.datasets.types import Example\n", - "from adalflow.core.types import RetrieverOutput\n", - "from adalflow.core import Component, Generator\n", - "from adalflow.core.retriever import Retriever\n", - "from adalflow.core.component import fun_to_component\n", - "from adalflow.components.model_client.openai_client import OpenAIClient" - ], - "metadata": { - "id": "aE3I05BqOmd7" - }, - "execution_count": 20, - "outputs": [] + { + "cell_type": "code", + "source": [ + "import dspy\n", + "import re\n", + "from typing import List, Union, Optional, Dict, Callable, Any, Tuple\n", + "from dataclasses import dataclass, field\n", + "import adalflow as adal\n", + "from adalflow.optim.parameter import Parameter, ParameterType\n", + "from adalflow.datasets.hotpot_qa import HotPotQA, HotPotQAData\n", + "from adalflow.datasets.types import Example\n", + "from adalflow.core.types import RetrieverOutput\n", + "from adalflow.core import Component, Generator\n", + "from adalflow.core.retriever import Retriever\n", + "from adalflow.core.component import fun_to_component\n", + "from adalflow.components.model_client.openai_client import OpenAIClient" + ], + "metadata": { + "id": "aE3I05BqOmd7" + }, + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "gpt_4o_model = {\n", + " \"model_client\": OpenAIClient(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-4o-mini\",\n", + " \"max_tokens\": 2000,\n", + " },\n", + "}\n", + "\n", + "gpt_3_model = {\n", + " \"model_client\": OpenAIClient(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"max_tokens\": 2000,\n", + " },\n", + "}" + ], + "metadata": { + "id": "cqUUoua9fUxQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def load_datasets():\n", + "\n", + " trainset = HotPotQA(split=\"train\", size=20)\n", + " valset = HotPotQA(split=\"val\", size=50)\n", + " testset = HotPotQA(split=\"test\", size=50)\n", + " print(f\"trainset, valset: {len(trainset)}, {len(valset)}, example: {trainset[0]}\")\n", + " return trainset, valset, testset\n", + "\n", + "\n", + "@dataclass\n", + "class AnswerData(adal.DataClass):\n", + " reasoning: str = field(\n", + " metadata={\"desc\": \"The reasoning to produce the answer\"},\n", + " )\n", + " answer: str = field(\n", + " metadata={\"desc\": \"The answer you produced\"},\n", + " )\n", + "\n", + " __output_fields__ = [\"reasoning\", \"answer\"]\n", + "\n", + "\n", + "dataset = HotPotQA(split=\"train\", size=20)\n", + "print(dataset[0], type(dataset[0]))\n", + "\n", + "HotPotQAData(\n", + " id=\"5a8b57f25542995d1e6f1371\",\n", + " question=\"Were Scott Derrickson and Ed Wood of the same nationality?\",\n", + " answer=\"yes\",\n", + " gold_titles=\"{'Scott Derrickson', 'Ed Wood'}\",\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "0irHeHUkOmL8", + "outputId": "61f778a2-9ec1-4fda-daa2-bcc7f31baa78" + }, + "execution_count": 22, + "outputs": [ { - "cell_type": "code", - "source": [ - "\n", - "gpt_4o_model = {\n", - " \"model_client\": OpenAIClient(),\n", - " \"model_kwargs\": {\n", - " \"model\": \"gpt-4o-mini\",\n", - " \"max_tokens\": 2000,\n", - " },\n", - "}\n", - "\n", - "gpt_3_model = {\n", - " \"model_client\": OpenAIClient(),\n", - " \"model_kwargs\": {\n", - " \"model\": \"gpt-3.5-turbo\",\n", - " \"max_tokens\": 2000,\n", - " },\n", - "}" - ], - "metadata": { - "id": "cqUUoua9fUxQ" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "HotPotQAData(id='5a8b57f25542995d1e6f1371', question='Were Scott Derrickson and Ed Wood of the same nationality?', answer='yes', gold_titles=\"{'Scott Derrickson', 'Ed Wood'}\") \n" + ] }, { - "cell_type": "code", - "source": [ - "def load_datasets():\n", - "\n", - " trainset = HotPotQA(split=\"train\", size=20)\n", - " valset = HotPotQA(split=\"val\", size=50)\n", - " testset = HotPotQA(split=\"test\", size=50)\n", - " print(f\"trainset, valset: {len(trainset)}, {len(valset)}, example: {trainset[0]}\")\n", - " return trainset, valset, testset\n", - "\n", - "\n", - "@dataclass\n", - "class AnswerData(adal.DataClass):\n", - " reasoning: str = field(\n", - " metadata={\"desc\": \"The reasoning to produce the answer\"},\n", - " )\n", - " answer: str = field(\n", - " metadata={\"desc\": \"The answer you produced\"},\n", - " )\n", - "\n", - " __output_fields__ = [\"reasoning\", \"answer\"]\n", - "\n", - "\n", - "dataset = HotPotQA(split=\"train\", size=20)\n", - "print(dataset[0], type(dataset[0]))\n", - "\n", - "HotPotQAData(id='5a8b57f25542995d1e6f1371', question='Were Scott Derrickson and Ed Wood of the same nationality?', answer='yes', gold_titles=\"{'Scott Derrickson', 'Ed Wood'}\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0irHeHUkOmL8", - "outputId": "61f778a2-9ec1-4fda-daa2-bcc7f31baa78" - }, - "execution_count": 22, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "HotPotQAData(id='5a8b57f25542995d1e6f1371', question='Were Scott Derrickson and Ed Wood of the same nationality?', answer='yes', gold_titles=\"{'Scott Derrickson', 'Ed Wood'}\") \n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "HotPotQAData(id='5a8b57f25542995d1e6f1371', question='Were Scott Derrickson and Ed Wood of the same nationality?', answer='yes', gold_titles=\"{'Scott Derrickson', 'Ed Wood'}\")" - ] - }, - "metadata": {}, - "execution_count": 22 - } + "output_type": "execute_result", + "data": { + "text/plain": [ + "HotPotQAData(id='5a8b57f25542995d1e6f1371', question='Were Scott Derrickson and Ed Wood of the same nationality?', answer='yes', gold_titles=\"{'Scott Derrickson', 'Ed Wood'}\")" ] - }, - { - "cell_type": "code", - "source": [ - "class DspyRetriever(adal.Retriever):\n", - " def __init__(self, top_k: int = 3):\n", - " super().__init__()\n", - " self.top_k = top_k\n", - " self.dspy_retriever = dspy.Retrieve(k=top_k)\n", - "\n", - " def call(self, input: str, top_k: Optional[int] = None) -> List[adal.RetrieverOutput]:\n", - "\n", - " k = top_k or self.top_k\n", - "\n", - " output = self.dspy_retriever(query_or_queries=input, k=k)\n", - " final_output: List[RetrieverOutput] = []\n", - " documents = output.passages\n", - "\n", - " final_output.append(\n", - " RetrieverOutput(\n", - " query=input,\n", - " documents=documents,\n", - " doc_indices=[],\n", - " )\n", - " )\n", - " return final_output\n", - "\n", - "def test_retriever():\n", - " question = \"How many storeys are in the castle that David Gregory inherited?\"\n", - " retriever = DspyRetriever(top_k=3)\n", - " retriever_out = retriever(input=question)\n", - " print(f\"retriever_out: {retriever_out}\")\n", - "\n", - "\n", - "def call(\n", - " self, question: str, id: Optional[str] = None\n", - " ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n", - " prompt_kwargs = self._prepare_input(question)\n", - " output = self.llm(prompt_kwargs=prompt_kwargs, id=id)\n", - " return output\n", - "\n", - "\n", - "def call(self, question: str, id: str = None) -> adal.GeneratorOutput:\n", - " if self.training:\n", - " raise ValueError(\n", - " \"This component is not supposed to be called in training mode\"\n", - " )\n", - "\n", - " retriever_out = self.retriever.call(input=question)\n", - "\n", - " successor_map_fn = lambda x: ( # noqa E731\n", - " \"\\n\\n\".join(x[0].documents) if x and x[0] and x[0].documents else \"\"\n", - " )\n", - " retrieved_context = successor_map_fn(retriever_out)\n", - "\n", - " prompt_kwargs = {\n", - " \"context\": retrieved_context,\n", - " \"question\": question,\n", - " }\n", - "\n", - " output = self.llm.call(\n", - " prompt_kwargs=prompt_kwargs,\n", - " id=id,\n", - " )\n", - " return output\n", - "\n", - "\n", - "def forward(self, question: str, id: str = None) -> adal.Parameter:\n", - " if not self.training:\n", - " raise ValueError(\"This component is not supposed to be called in eval mode\")\n", - " retriever_out = self.retriever.forward(input=question)\n", - " successor_map_fn = lambda x: ( # noqa E731\n", - " \"\\n\\n\".join(x.data[0].documents)\n", - " if x.data and x.data[0] and x.data[0].documents\n", - " else \"\"\n", - " )\n", - " retriever_out.add_successor_map_fn(successor=self.llm, map_fn=successor_map_fn)\n", - " generator_out = self.llm.forward(\n", - " prompt_kwargs={\"question\": question, \"context\": retriever_out}, id=id\n", - " )\n", - " return generator_out\n", - "\n", - "\n", - "def bicall(\n", - " self, question: str, id: str = None\n", - ") -> Union[adal.GeneratorOutput, adal.Parameter]:\n", - " \"\"\"You can also combine both the forward and call in the same function.\n", - " Supports both training and eval mode by using __call__ for GradComponents\n", - " like Retriever and Generator\n", - " \"\"\"\n", - " retriever_out = self.retriever(input=question)\n", - " if isinstance(retriever_out, adal.Parameter):\n", - " successor_map_fn = lambda x: ( # noqa E731\n", - " \"\\n\\n\".join(x.data[0].documents)\n", - " if x.data and x.data[0] and x.data[0].documents\n", - " else \"\"\n", - " )\n", - " retriever_out.add_successor_map_fn(\n", - " successor=self.llm, map_fn=successor_map_fn\n", - " )\n", - " else:\n", - " successor_map_fn = lambda x: ( # noqa E731\n", - " \"\\n\\n\".join(x[0].documents) if x and x[0] and x[0].documents else \"\"\n", - " )\n", - " retrieved_context = successor_map_fn(retriever_out)\n", - " prompt_kwargs = {\n", - " \"context\": retrieved_context,\n", - " \"question\": question,\n", - " }\n", - " output = self.llm(prompt_kwargs=prompt_kwargs, id=id)\n", - " return output\n", - "\n", - "task_desc_str = r\"\"\"Answer questions with short factoid answers.\n", - "\n", - "You will receive context(may contain relevant facts) and a question.\n", - "Think step by step.\"\"\"\n", - "\n", - "\n", - "class VanillaRAG(adal.GradComponent):\n", - " def __init__(self, passages_per_hop=3, model_client=None, model_kwargs=None):\n", - " super().__init__()\n", - "\n", - " self.passages_per_hop = passages_per_hop\n", - "\n", - " self.retriever = DspyRetriever(top_k=passages_per_hop)\n", - " self.llm_parser = adal.DataClassParser(\n", - " data_class=AnswerData, return_data_class=True, format_type=\"json\"\n", - " )\n", - " self.llm = Generator(\n", - " model_client=model_client,\n", - " model_kwargs=model_kwargs,\n", - " prompt_kwargs={\n", - " \"task_desc_str\": adal.Parameter(\n", - " data=task_desc_str,\n", - " role_desc=\"Task description for the language model\",\n", - " param_type=adal.ParameterType.PROMPT,\n", - " ),\n", - " \"few_shot_demos\": adal.Parameter(\n", - " data=None,\n", - " requires_opt=True,\n", - " role_desc=\"To provide few shot demos to the language model\",\n", - " param_type=adal.ParameterType.DEMOS,\n", - " ),\n", - " \"output_format_str\": self.llm_parser.get_output_format_str(),\n", - " },\n", - " template=answer_template,\n", - " output_processors=self.llm_parser,\n", - " use_cache=True,\n", - " )\n", - "\n", - "\n", - "class VallinaRAGAdal(adal.AdalComponent):\n", - " def __init__(\n", - " self,\n", - " model_client: adal.ModelClient,\n", - " model_kwargs: Dict,\n", - " backward_engine_model_config: Dict | None = None,\n", - " teacher_model_config: Dict | None = None,\n", - " text_optimizer_model_config: Dict | None = None,\n", - " ):\n", - " task = VanillaRAG(\n", - " model_client=model_client,\n", - " model_kwargs=model_kwargs,\n", - " passages_per_hop=3,\n", - " )\n", - " eval_fn = AnswerMatchAcc(type=\"fuzzy_match\").compute_single_item\n", - " loss_fn = adal.EvalFnToTextLoss(\n", - " eval_fn=eval_fn, eval_fn_desc=\"fuzzy_match: 1 if str(y) in str(y_gt) else 0\"\n", - " )\n", - " super().__init__(\n", - " task=task,\n", - " eval_fn=eval_fn,\n", - " loss_fn=loss_fn,\n", - " backward_engine_model_config=backward_engine_model_config,\n", - " teacher_model_config=teacher_model_config,\n", - " text_optimizer_model_config=text_optimizer_model_config,\n", - " )\n", - "\n", - " # tell the trainer how to call the task\n", - " def prepare_task(self, sample: HotPotQAData) -> Tuple[Callable[..., Any], Dict]:\n", - " if self.task.training:\n", - " return self.task.forward, {\"question\": sample.question, \"id\": sample.id}\n", - " else:\n", - " return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n", - "\n", - "\n", - " # eval mode: get the generator output, directly engage with the eval_fn\n", - " def prepare_eval(self, sample: HotPotQAData, y_pred: adal.GeneratorOutput) -> float:\n", - " y_label = \"\"\n", - " if y_pred and y_pred.data and y_pred.data.answer:\n", - " y_label = y_pred.data.answer\n", - " return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}\n", - "\n", - "\n", - " # train mode: get the loss and get the data from the full_response\n", - " def prepare_loss(self, sample: HotPotQAData, pred: adal.Parameter):\n", - " # prepare gt parameter\n", - " y_gt = adal.Parameter(\n", - " name=\"y_gt\",\n", - " data=sample.answer,\n", - " eval_input=sample.answer,\n", - " requires_opt=False,\n", - " )\n", - "\n", - " # pred's full_response is the output of the task pipeline which is GeneratorOutput\n", - " pred.eval_input = (\n", - " pred.full_response.data.answer\n", - " if pred.full_response\n", - " and pred.full_response.data\n", - " and pred.full_response.data.answer\n", - " else \"\"\n", - " )\n", - " return self.loss_fn, {\"kwargs\": {\"y\": pred, \"y_gt\": y_gt}}\n", - "\n", - "def train_diagnose(\n", - " model_client: adal.ModelClient,\n", - " model_kwargs: Dict,\n", - ") -> Dict:\n", - "\n", - " trainset, valset, testset = load_datasets()\n", - "\n", - " adal_component = VallinaRAGAdal(\n", - " model_client,\n", - " model_kwargs,\n", - " backward_engine_model_config=gpt_4o_model,\n", - " teacher_model_config=gpt_3_model,\n", - " text_optimizer_model_config=gpt_3_model,\n", - " )\n", - " trainer = adal.Trainer(adaltask=adal_component)\n", - " trainer.diagnose(dataset=trainset, split=\"train\")\n", - " # trainer.diagnose(dataset=valset, split=\"val\")\n", - " # trainer.diagnose(dataset=testset, split=\"test\")\n" - ], - "metadata": { - "id": "ZZIEtZYHNVjo" - }, - "execution_count": 23, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Issues and feedback\n", - "\n", - "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n", - "\n", - "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)." - ], - "metadata": { - "id": "AmkbyxmuruUu" - } + }, + "metadata": {}, + "execution_count": 22 } - ] + ] + }, + { + "cell_type": "code", + "source": [ + "class DspyRetriever(adal.Retriever):\n", + " def __init__(self, top_k: int = 3):\n", + " super().__init__()\n", + " self.top_k = top_k\n", + " self.dspy_retriever = dspy.Retrieve(k=top_k)\n", + "\n", + " def call(\n", + " self, input: str, top_k: Optional[int] = None\n", + " ) -> List[adal.RetrieverOutput]:\n", + "\n", + " k = top_k or self.top_k\n", + "\n", + " output = self.dspy_retriever(query_or_queries=input, k=k)\n", + " final_output: List[RetrieverOutput] = []\n", + " documents = output.passages\n", + "\n", + " final_output.append(\n", + " RetrieverOutput(\n", + " query=input,\n", + " documents=documents,\n", + " doc_indices=[],\n", + " )\n", + " )\n", + " return final_output\n", + "\n", + "\n", + "def test_retriever():\n", + " question = \"How many storeys are in the castle that David Gregory inherited?\"\n", + " retriever = DspyRetriever(top_k=3)\n", + " retriever_out = retriever(input=question)\n", + " print(f\"retriever_out: {retriever_out}\")\n", + "\n", + "\n", + "def call(\n", + " self, question: str, id: Optional[str] = None\n", + ") -> Union[adal.GeneratorOutput, adal.Parameter]:\n", + " prompt_kwargs = self._prepare_input(question)\n", + " output = self.llm(prompt_kwargs=prompt_kwargs, id=id)\n", + " return output\n", + "\n", + "\n", + "def call(self, question: str, id: str = None) -> adal.GeneratorOutput:\n", + " if self.training:\n", + " raise ValueError(\"This component is not supposed to be called in training mode\")\n", + "\n", + " retriever_out = self.retriever.call(input=question)\n", + "\n", + " successor_map_fn = lambda x: ( # noqa E731\n", + " \"\\n\\n\".join(x[0].documents) if x and x[0] and x[0].documents else \"\"\n", + " )\n", + " retrieved_context = successor_map_fn(retriever_out)\n", + "\n", + " prompt_kwargs = {\n", + " \"context\": retrieved_context,\n", + " \"question\": question,\n", + " }\n", + "\n", + " output = self.llm.call(\n", + " prompt_kwargs=prompt_kwargs,\n", + " id=id,\n", + " )\n", + " return output\n", + "\n", + "\n", + "def forward(self, question: str, id: str = None) -> adal.Parameter:\n", + " if not self.training:\n", + " raise ValueError(\"This component is not supposed to be called in eval mode\")\n", + " retriever_out = self.retriever.forward(input=question)\n", + " successor_map_fn = lambda x: ( # noqa E731\n", + " \"\\n\\n\".join(x.data[0].documents)\n", + " if x.data and x.data[0] and x.data[0].documents\n", + " else \"\"\n", + " )\n", + " retriever_out.add_successor_map_fn(successor=self.llm, map_fn=successor_map_fn)\n", + " generator_out = self.llm.forward(\n", + " prompt_kwargs={\"question\": question, \"context\": retriever_out}, id=id\n", + " )\n", + " return generator_out\n", + "\n", + "\n", + "def bicall(\n", + " self, question: str, id: str = None\n", + ") -> Union[adal.GeneratorOutput, adal.Parameter]:\n", + " \"\"\"You can also combine both the forward and call in the same function.\n", + " Supports both training and eval mode by using __call__ for GradComponents\n", + " like Retriever and Generator\n", + " \"\"\"\n", + " retriever_out = self.retriever(input=question)\n", + " if isinstance(retriever_out, adal.Parameter):\n", + " successor_map_fn = lambda x: ( # noqa E731\n", + " \"\\n\\n\".join(x.data[0].documents)\n", + " if x.data and x.data[0] and x.data[0].documents\n", + " else \"\"\n", + " )\n", + " retriever_out.add_successor_map_fn(successor=self.llm, map_fn=successor_map_fn)\n", + " else:\n", + " successor_map_fn = lambda x: ( # noqa E731\n", + " \"\\n\\n\".join(x[0].documents) if x and x[0] and x[0].documents else \"\"\n", + " )\n", + " retrieved_context = successor_map_fn(retriever_out)\n", + " prompt_kwargs = {\n", + " \"context\": retrieved_context,\n", + " \"question\": question,\n", + " }\n", + " output = self.llm(prompt_kwargs=prompt_kwargs, id=id)\n", + " return output\n", + "\n", + "\n", + "task_desc_str = r\"\"\"Answer questions with short factoid answers.\n", + "\n", + "You will receive context(may contain relevant facts) and a question.\n", + "Think step by step.\"\"\"\n", + "\n", + "\n", + "class VanillaRAG(adal.GradComponent):\n", + " def __init__(self, passages_per_hop=3, model_client=None, model_kwargs=None):\n", + " super().__init__()\n", + "\n", + " self.passages_per_hop = passages_per_hop\n", + "\n", + " self.retriever = DspyRetriever(top_k=passages_per_hop)\n", + " self.llm_parser = adal.DataClassParser(\n", + " data_class=AnswerData, return_data_class=True, format_type=\"json\"\n", + " )\n", + " self.llm = Generator(\n", + " model_client=model_client,\n", + " model_kwargs=model_kwargs,\n", + " prompt_kwargs={\n", + " \"task_desc_str\": adal.Parameter(\n", + " data=task_desc_str,\n", + " role_desc=\"Task description for the language model\",\n", + " param_type=adal.ParameterType.PROMPT,\n", + " ),\n", + " \"few_shot_demos\": adal.Parameter(\n", + " data=None,\n", + " requires_opt=True,\n", + " role_desc=\"To provide few shot demos to the language model\",\n", + " param_type=adal.ParameterType.DEMOS,\n", + " ),\n", + " \"output_format_str\": self.llm_parser.get_output_format_str(),\n", + " },\n", + " template=answer_template,\n", + " output_processors=self.llm_parser,\n", + " use_cache=True,\n", + " )\n", + "\n", + "\n", + "class VallinaRAGAdal(adal.AdalComponent):\n", + " def __init__(\n", + " self,\n", + " model_client: adal.ModelClient,\n", + " model_kwargs: Dict,\n", + " backward_engine_model_config: Dict | None = None,\n", + " teacher_model_config: Dict | None = None,\n", + " text_optimizer_model_config: Dict | None = None,\n", + " ):\n", + " task = VanillaRAG(\n", + " model_client=model_client,\n", + " model_kwargs=model_kwargs,\n", + " passages_per_hop=3,\n", + " )\n", + " eval_fn = AnswerMatchAcc(type=\"fuzzy_match\").compute_single_item\n", + " loss_fn = adal.EvalFnToTextLoss(\n", + " eval_fn=eval_fn, eval_fn_desc=\"fuzzy_match: 1 if str(y) in str(y_gt) else 0\"\n", + " )\n", + " super().__init__(\n", + " task=task,\n", + " eval_fn=eval_fn,\n", + " loss_fn=loss_fn,\n", + " backward_engine_model_config=backward_engine_model_config,\n", + " teacher_model_config=teacher_model_config,\n", + " text_optimizer_model_config=text_optimizer_model_config,\n", + " )\n", + "\n", + " # tell the trainer how to call the task\n", + " def prepare_task(self, sample: HotPotQAData) -> Tuple[Callable[..., Any], Dict]:\n", + " if self.task.training:\n", + " return self.task.forward, {\"question\": sample.question, \"id\": sample.id}\n", + " else:\n", + " return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n", + "\n", + " # eval mode: get the generator output, directly engage with the eval_fn\n", + " def prepare_eval(self, sample: HotPotQAData, y_pred: adal.GeneratorOutput) -> float:\n", + " y_label = \"\"\n", + " if y_pred and y_pred.data and y_pred.data.answer:\n", + " y_label = y_pred.data.answer\n", + " return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}\n", + "\n", + " # train mode: get the loss and get the data from the full_response\n", + " def prepare_loss(self, sample: HotPotQAData, pred: adal.Parameter):\n", + " # prepare gt parameter\n", + " y_gt = adal.Parameter(\n", + " name=\"y_gt\",\n", + " data=sample.answer,\n", + " eval_input=sample.answer,\n", + " requires_opt=False,\n", + " )\n", + "\n", + " # pred's full_response is the output of the task pipeline which is GeneratorOutput\n", + " pred.eval_input = (\n", + " pred.full_response.data.answer\n", + " if pred.full_response\n", + " and pred.full_response.data\n", + " and pred.full_response.data.answer\n", + " else \"\"\n", + " )\n", + " return self.loss_fn, {\"kwargs\": {\"y\": pred, \"y_gt\": y_gt}}\n", + "\n", + "\n", + "def train_diagnose(\n", + " model_client: adal.ModelClient,\n", + " model_kwargs: Dict,\n", + ") -> Dict:\n", + "\n", + " trainset, valset, testset = load_datasets()\n", + "\n", + " adal_component = VallinaRAGAdal(\n", + " model_client,\n", + " model_kwargs,\n", + " backward_engine_model_config=gpt_4o_model,\n", + " teacher_model_config=gpt_3_model,\n", + " text_optimizer_model_config=gpt_3_model,\n", + " )\n", + " trainer = adal.Trainer(adaltask=adal_component)\n", + " trainer.diagnose(dataset=trainset, split=\"train\")\n", + " # trainer.diagnose(dataset=valset, split=\"val\")\n", + " # trainer.diagnose(dataset=testset, split=\"test\")" + ], + "metadata": { + "id": "ZZIEtZYHNVjo" + }, + "execution_count": 23, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Issues and feedback\n", + "\n", + "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n", + "\n", + "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)." + ], + "metadata": { + "id": "AmkbyxmuruUu" + } + } + ] } diff --git a/notebooks/tutorials/adalflow_rag_playbook.ipynb b/notebooks/tutorials/adalflow_rag_playbook.ipynb index 27c6bda0..308ade6e 100644 --- a/notebooks/tutorials/adalflow_rag_playbook.ipynb +++ b/notebooks/tutorials/adalflow_rag_playbook.ipynb @@ -1,522 +1,526 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Adalflow RAG Playbook example\n", + "\n", + "There are different patterns to build a RAG:\n", + "\n", + "- RAG with separate data process pipeline and a RAG task pipeline. This fits into a scenario where there is lots of data in production database, and we preprocess the data to embeddings and then we build a RAG task pipeline that retrieves context in multiple stages.\n", + "\n", + "- RAG with dynamic data access and caching the embedding dynamically in a local storage.\n", + "\n", + "Here we will have have a look at an example with a local DB using FAISS" + ], + "metadata": { + "id": "lLGpv1fLLIjF" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "sfKEfaYC3Go7" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai,groq,faiss-cpu]\n", + "\n", + "clear_output()" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", + "\n", + "# Set environment variables\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "os.environ[\"GROQ_API_KEY\"] = groq_api_key\n", + "\n", + "print(\"API keys have been set.\")" + ], + "metadata": { "colab": { - "provenance": [] + "base_uri": "https://localhost:8080/" }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" + "id": "-4c_AGBt3PlR", + "outputId": "a36f157b-0b18-4f3d-d5a8-09aa94743922" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", + "Please enter your GROQ API key: ยทยทยทยทยทยทยทยทยทยท\n", + "API keys have been set.\n" + ] } + ] }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Adalflow RAG Playbook example\n", - "\n", - "There are different patterns to build a RAG:\n", - "\n", - "- RAG with separate data process pipeline and a RAG task pipeline. This fits into a scenario where there is lots of data in production database, and we preprocess the data to embeddings and then we build a RAG task pipeline that retrieves context in multiple stages.\n", - "\n", - "- RAG with dynamic data access and caching the embedding dynamically in a local storage.\n", - "\n", - "Here we will have have a look at an example with a local DB using FAISS" - ], - "metadata": { - "id": "lLGpv1fLLIjF" - } - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "sfKEfaYC3Go7" - }, - "outputs": [], - "source": [ - "from IPython.display import clear_output\n", - "\n", - "!pip install -U adalflow[openai,groq,faiss-cpu]\n", - "\n", - "clear_output()\n" - ] - }, - { - "cell_type": "code", - "source": [ - "import os\n", - "from getpass import getpass\n", - "\n", - "# Prompt user to enter their API keys securely\n", - "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", - "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", - "\n", - "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "os.environ['GROQ_API_KEY'] = groq_api_key\n", - "\n", - "print(\"API keys have been set.\")\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-4c_AGBt3PlR", - "outputId": "a36f157b-0b18-4f3d-d5a8-09aa94743922" - }, - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", - "Please enter your GROQ API key: ยทยทยทยทยทยทยทยทยทยท\n", - "API keys have been set.\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "from typing import Any, List, Optional\n", - "import os\n", - "from adalflow.core import Component, Generator, Embedder, Sequential\n", - "from adalflow.core.types import Document, ModelClientType\n", - "from adalflow.core.string_parser import JsonParser\n", - "from adalflow.core.db import LocalDB\n", - "from adalflow.utils import setup_env\n", - "from adalflow.components.retriever.faiss_retriever import FAISSRetriever\n", - "from adalflow.components.data_process import (\n", - " RetrieverOutputToContextStr,\n", - " ToEmbeddings,\n", - " TextSplitter,\n", - ")\n", - "from adalflow.utils.global_config import get_adalflow_default_root_path\n" - ], - "metadata": { - "id": "V9LsGDnm3RbV" - }, - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "configs = {\n", - " \"embedder\": {\n", - " \"batch_size\": 100,\n", - " \"model_kwargs\": {\n", - " \"model\": \"text-embedding-3-small\",\n", - " \"dimensions\": 256,\n", - " \"encoding_format\": \"float\",\n", - " },\n", - " },\n", - " \"retriever\": {\n", - " \"top_k\": 5,\n", - " },\n", - " \"generator\": {\n", - " \"model_client\": ModelClientType.OPENAI(),\n", - " \"model_kwargs\": {\n", - " \"model\": \"gpt-3.5-turbo\",\n", - " \"temperature\": 0.3,\n", - " \"stream\": False,\n", - " },\n", - " },\n", - " \"text_splitter\": {\n", - " \"split_by\": \"word\",\n", - " \"chunk_size\": 400,\n", - " \"chunk_overlap\": 200,\n", - " },\n", - "}\n" - ], - "metadata": { - "id": "kWGTZxrw3Tli" - }, - "execution_count": 5, - "outputs": [] + { + "cell_type": "code", + "source": [ + "from typing import Any, List, Optional\n", + "import os\n", + "from adalflow.core import Component, Generator, Embedder, Sequential\n", + "from adalflow.core.types import Document, ModelClientType\n", + "from adalflow.core.string_parser import JsonParser\n", + "from adalflow.core.db import LocalDB\n", + "from adalflow.utils import setup_env\n", + "from adalflow.components.retriever.faiss_retriever import FAISSRetriever\n", + "from adalflow.components.data_process import (\n", + " RetrieverOutputToContextStr,\n", + " ToEmbeddings,\n", + " TextSplitter,\n", + ")\n", + "from adalflow.utils.global_config import get_adalflow_default_root_path" + ], + "metadata": { + "id": "V9LsGDnm3RbV" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "configs = {\n", + " \"embedder\": {\n", + " \"batch_size\": 100,\n", + " \"model_kwargs\": {\n", + " \"model\": \"text-embedding-3-small\",\n", + " \"dimensions\": 256,\n", + " \"encoding_format\": \"float\",\n", + " },\n", + " },\n", + " \"retriever\": {\n", + " \"top_k\": 5,\n", + " },\n", + " \"generator\": {\n", + " \"model_client\": ModelClientType.OPENAI(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"temperature\": 0.3,\n", + " \"stream\": False,\n", + " },\n", + " },\n", + " \"text_splitter\": {\n", + " \"split_by\": \"word\",\n", + " \"chunk_size\": 400,\n", + " \"chunk_overlap\": 200,\n", + " },\n", + "}" + ], + "metadata": { + "id": "kWGTZxrw3Tli" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def prepare_data_pipeline():\n", + " splitter = TextSplitter(**configs[\"text_splitter\"])\n", + " embedder = Embedder(\n", + " model_client=ModelClientType.OPENAI(),\n", + " model_kwargs=configs[\"embedder\"][\"model_kwargs\"],\n", + " )\n", + " embedder_transformer = ToEmbeddings(\n", + " embedder=embedder, batch_size=configs[\"embedder\"][\"batch_size\"]\n", + " )\n", + " data_transformer = Sequential(splitter, embedder_transformer)\n", + " return data_transformer\n", + "\n", + "\n", + "def prepare_database_with_index(\n", + " docs: List[Document],\n", + " index_file: str = \"index.faiss\",\n", + " index_path: Optional[str] = None,\n", + "):\n", + " index_path = index_path or get_adalflow_default_root_path()\n", + " index_path = os.path.join(index_path, index_file)\n", + " if os.path.exists(index_path):\n", + " return None\n", + " db = LocalDB()\n", + " db.load(docs)\n", + " data_transformer = prepare_data_pipeline()\n", + " db.transform(data_transformer, key=\"data_transformer\")\n", + " db.save_state(index_path)" + ], + "metadata": { + "id": "1QE0PCKs4BLz" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "RAG_PROMPT_TEMPLATE = r\"\"\"\n", + "{{task_desc}}\n", + "\n", + "\n", + "{{input_str}}\n", + "{{context_str}}\n", + "\n", + "\"\"\"\n", + "\n", + "rag_prompt_task_desc = r\"\"\"\n", + "You are a helpful assistant.\n", + "\n", + "Your task is to answer the query that may or may not come with context information.\n", + "When context is provided, you should stick to the context and less on your prior knowledge to answer the query.\n", + "\n", + "Output JSON format:\n", + "{\n", + " \"answer\": \"The answer to the query\",\n", + "}\"\"\"\n", + "\n", + "\n", + "class RAG(Component):\n", + " def __init__(\n", + " self,\n", + " index_file: str = \"index.faiss\",\n", + " index_path: Optional[str] = None,\n", + " configs: dict = configs,\n", + " ):\n", + " super().__init__()\n", + "\n", + " index_path = index_path or get_adalflow_default_root_path()\n", + " index_path = os.path.join(index_path, index_file)\n", + " self.index_path = index_path\n", + "\n", + " if not os.path.exists(index_path):\n", + " self.db = LocalDB()\n", + " self.register_data_transformer()\n", + " self.transformed_docs = []\n", + " else:\n", + " self.db = LocalDB.load_state(index_path)\n", + " self.transformed_docs = self.db.get_transformed_data(\"data_transformer\")\n", + "\n", + " embedder = Embedder(\n", + " model_client=ModelClientType.OPENAI(),\n", + " model_kwargs=configs[\"embedder\"][\"model_kwargs\"],\n", + " )\n", + "\n", + " self.retriever = FAISSRetriever(\n", + " **configs[\"retriever\"],\n", + " embedder=embedder,\n", + " documents=self.transformed_docs,\n", + " document_map_func=lambda doc: doc.vector,\n", + " )\n", + " self.retriever_output_processors = RetrieverOutputToContextStr(deduplicate=True)\n", + "\n", + " self.generator = Generator(\n", + " **configs[\"generator\"],\n", + " prompt_kwargs={\"task_desc_str\": rag_prompt_task_desc},\n", + " output_processors=JsonParser(),\n", + " )\n", + "\n", + " def register_data_transformer(self):\n", + " if \"data_transformer\" not in self.db.get_transformer_keys():\n", + " data_transformer = prepare_data_pipeline()\n", + " self.db.register_transformer(data_transformer, key=\"data_transformer\")\n", + " print(\"Data transformer registered\")\n", + "\n", + " def add_documents(self, docs: List[Document]):\n", + " self.db.extend(docs, apply_transformer=True)\n", + " self.db.save_state(self.index_path)\n", + "\n", + " def get_transformed_docs(self, filter_func=None):\n", + " return self.db.get_transformed_data(\"data_transformer\", filter_func)\n", + "\n", + " def prepare_retriever(self, filter_func=None):\n", + " self.transformed_docs = self.get_transformed_docs(filter_func)\n", + " self.retriever.build_index_from_documents(\n", + " self.transformed_docs, document_map_func=lambda doc: doc.vector\n", + " )\n", + "\n", + " def generate(self, query: str, context: Optional[str] = None) -> Any:\n", + " if not self.generator:\n", + " raise ValueError(\"Generator is not set\")\n", + " prompt_kwargs = {\"context_str\": context, \"input_str\": query}\n", + " response = self.generator(prompt_kwargs=prompt_kwargs)\n", + " return response, context\n", + "\n", + " def call(self, query: str, verbose: bool = False) -> Any:\n", + " retrieved_documents = self.retriever(query)\n", + " for i, retriever_output in enumerate(retrieved_documents):\n", + " retrieved_documents[i].documents = [\n", + " self.transformed_docs[doc_index]\n", + " for doc_index in retriever_output.doc_indices\n", + " ]\n", + " if verbose:\n", + " print(f\"retrieved_documents: \\n {retrieved_documents}\")\n", + "\n", + " context_str = self.retriever_output_processors(retrieved_documents)\n", + " if verbose:\n", + " print(f\"context_str: \\n {context_str}\")\n", + "\n", + " return self.generate(query, context=context_str)" + ], + "metadata": { + "id": "6Mu1HXhy4DIG" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Prepare initial documents\n", + "doc1 = Document(\n", + " meta_data={\"title\": \"Li Yin's profile\"},\n", + " text=\"My name is Li Yin, I love rock climbing\" + \"lots of nonsense text\" * 500,\n", + " id=\"doc1\",\n", + ")\n", + "doc2 = Document(\n", + " meta_data={\"title\": \"Interviewing Li Yin\"},\n", + " text=\"lots of more nonsense text\" * 250\n", + " + \"Li Yin is an AI researcher and a software engineer\"\n", + " + \"lots of more nonsense text\" * 250,\n", + " id=\"doc2\",\n", + ")\n", + "\n", + "# Prepare the database (only runs once)\n", + "prepare_database_with_index([doc1, doc2], index_file=\"index.faiss\")\n", + "\n", + "# Initialize RAG\n", + "rag = RAG(index_file=\"index.faiss\")\n", + "print(rag)\n", + "\n", + "# Query the RAG system\n", + "query = \"What is Li Yin's hobby and profession?\"\n", + "response = rag.call(query)\n", + "print(f\"Response: {response}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "sPnx4PY34D1j", + "outputId": "f66d6f1a-70bf-40e9-a160-591fcfdcbed3" + }, + "execution_count": 8, + "outputs": [ { - "cell_type": "code", - "source": [ - "def prepare_data_pipeline():\n", - " splitter = TextSplitter(**configs[\"text_splitter\"])\n", - " embedder = Embedder(\n", - " model_client=ModelClientType.OPENAI(),\n", - " model_kwargs=configs[\"embedder\"][\"model_kwargs\"],\n", - " )\n", - " embedder_transformer = ToEmbeddings(\n", - " embedder=embedder, batch_size=configs[\"embedder\"][\"batch_size\"]\n", - " )\n", - " data_transformer = Sequential(splitter, embedder_transformer)\n", - " return data_transformer\n", - "\n", - "def prepare_database_with_index(\n", - " docs: List[Document],\n", - " index_file: str = \"index.faiss\",\n", - " index_path: Optional[str] = None,\n", - "):\n", - " index_path = index_path or get_adalflow_default_root_path()\n", - " index_path = os.path.join(index_path, index_file)\n", - " if os.path.exists(index_path):\n", - " return None\n", - " db = LocalDB()\n", - " db.load(docs)\n", - " data_transformer = prepare_data_pipeline()\n", - " db.transform(data_transformer, key=\"data_transformer\")\n", - " db.save_state(index_path)\n" - ], - "metadata": { - "id": "1QE0PCKs4BLz" - }, - "execution_count": 6, - "outputs": [] + "output_type": "stream", + "name": "stderr", + "text": [ + "Splitting Documents in Batches: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1/1 [00:00<00:00, 109.58it/s]\n", + "Batch embedding documents: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1/1 [00:01<00:00, 1.33s/it]\n", + "Adding embeddings to documents from batch: 1it [00:00, 6462.72it/s]\n" + ] }, { - "cell_type": "code", - "source": [ - "RAG_PROMPT_TEMPLATE = r\"\"\"\n", - "{{task_desc}}\n", - "\n", - "\n", - "{{input_str}}\n", - "{{context_str}}\n", - "\n", - "\"\"\"\n", - "\n", - "rag_prompt_task_desc = r\"\"\"\n", - "You are a helpful assistant.\n", - "\n", - "Your task is to answer the query that may or may not come with context information.\n", - "When context is provided, you should stick to the context and less on your prior knowledge to answer the query.\n", - "\n", - "Output JSON format:\n", - "{\n", - " \"answer\": \"The answer to the query\",\n", - "}\"\"\"\n", - "\n", - "class RAG(Component):\n", - " def __init__(\n", - " self,\n", - " index_file: str = \"index.faiss\",\n", - " index_path: Optional[str] = None,\n", - " configs: dict = configs,\n", - " ):\n", - " super().__init__()\n", - "\n", - " index_path = index_path or get_adalflow_default_root_path()\n", - " index_path = os.path.join(index_path, index_file)\n", - " self.index_path = index_path\n", - "\n", - " if not os.path.exists(index_path):\n", - " self.db = LocalDB()\n", - " self.register_data_transformer()\n", - " self.transformed_docs = []\n", - " else:\n", - " self.db = LocalDB.load_state(index_path)\n", - " self.transformed_docs = self.db.get_transformed_data(\"data_transformer\")\n", - "\n", - " embedder = Embedder(\n", - " model_client=ModelClientType.OPENAI(),\n", - " model_kwargs=configs[\"embedder\"][\"model_kwargs\"],\n", - " )\n", - "\n", - " self.retriever = FAISSRetriever(\n", - " **configs[\"retriever\"],\n", - " embedder=embedder,\n", - " documents=self.transformed_docs,\n", - " document_map_func=lambda doc: doc.vector,\n", - " )\n", - " self.retriever_output_processors = RetrieverOutputToContextStr(deduplicate=True)\n", - "\n", - " self.generator = Generator(\n", - " **configs[\"generator\"],\n", - " prompt_kwargs={\"task_desc_str\": rag_prompt_task_desc},\n", - " output_processors=JsonParser(),\n", - " )\n", - "\n", - " def register_data_transformer(self):\n", - " if \"data_transformer\" not in self.db.get_transformer_keys():\n", - " data_transformer = prepare_data_pipeline()\n", - " self.db.register_transformer(data_transformer, key=\"data_transformer\")\n", - " print(\"Data transformer registered\")\n", - "\n", - " def add_documents(self, docs: List[Document]):\n", - " self.db.extend(docs, apply_transformer=True)\n", - " self.db.save_state(self.index_path)\n", - "\n", - " def get_transformed_docs(self, filter_func=None):\n", - " return self.db.get_transformed_data(\"data_transformer\", filter_func)\n", - "\n", - " def prepare_retriever(self, filter_func=None):\n", - " self.transformed_docs = self.get_transformed_docs(filter_func)\n", - " self.retriever.build_index_from_documents(\n", - " self.transformed_docs, document_map_func=lambda doc: doc.vector\n", - " )\n", - "\n", - " def generate(self, query: str, context: Optional[str] = None) -> Any:\n", - " if not self.generator:\n", - " raise ValueError(\"Generator is not set\")\n", - " prompt_kwargs = {\"context_str\": context, \"input_str\": query}\n", - " response = self.generator(prompt_kwargs=prompt_kwargs)\n", - " return response, context\n", - "\n", - " def call(self, query: str, verbose: bool = False) -> Any:\n", - " retrieved_documents = self.retriever(query)\n", - " for i, retriever_output in enumerate(retrieved_documents):\n", - " retrieved_documents[i].documents = [\n", - " self.transformed_docs[doc_index]\n", - " for doc_index in retriever_output.doc_indices\n", - " ]\n", - " if verbose:\n", - " print(f\"retrieved_documents: \\n {retrieved_documents}\")\n", - "\n", - " context_str = self.retriever_output_processors(retrieved_documents)\n", - " if verbose:\n", - " print(f\"context_str: \\n {context_str}\")\n", - "\n", - " return self.generate(query, context=context_str)\n" - ], - "metadata": { - "id": "6Mu1HXhy4DIG" - }, - "execution_count": 7, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "Saved the state of the DB to /root/.adalflow/index.faiss\n", + "RAG(\n", + " (db): LocalDB(name='LocalDB', items=[Document(id=doc1, text='My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense te...', meta_data={'title': \"Li Yin's profile\"}, vector=[], parent_doc_id=None, order=None, score=None), Document(id=doc2, text='lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...', meta_data={'title': 'Interviewing Li Yin'}, vector=[], parent_doc_id=None, order=None, score=None)], transformed_items={'data_transformer': [Document(id=59f7f6ad-eb4c-4fdb-8d04-6dba1ee439bc, text='My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense te...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=0, score=None), Document(id=2486725e-47ff-4978-84fc-7937778b0e45, text='textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nons...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=1, score=None), Document(id=96993047-4cff-436d-b8ac-e02da4ae7fec, text='nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlot...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=2, score=None), Document(id=77742f90-0c0c-4143-802d-3557577d4935, text='of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense text...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=3, score=None), Document(id=81ba770e-c5f2-4dc5-98fc-349ab9143ef9, text='textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nons...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=4, score=None), Document(id=dff6f5e3-5929-4e3c-ba5f-79f5116c1fa3, text='nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlot...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=5, score=None), Document(id=1e7888e2-0783-40b2-ab85-067e3ba71fad, text='of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense text...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=6, score=None), Document(id=2deb945f-dfb9-46d3-a60b-dae77e2f5fd8, text='lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=0, score=None), Document(id=3d9c21aa-d583-47fe-b143-710b4bc4a8b2, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=1, score=None), Document(id=a318ffea-2542-4493-ab2d-03d10a94e860, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=2, score=None), Document(id=b5c05820-7545-43a8-a4a3-691c5ccc79d1, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=3, score=None), Document(id=a739cd3e-8826-4e74-afa9-499498115621, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=4, score=None), Document(id=7153cde2-b6ee-4485-91e9-9de2f4bd45ab, text='textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsens...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=5, score=None), Document(id=c3f3ed48-acc2-41b5-b4ac-a6107b651789, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=6, score=None), Document(id=7bfd84e6-0025-4cfa-8c0a-63c9de9a8d4a, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=7, score=None), Document(id=8bece98d-65f0-4dd1-9407-d1c54413bef4, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=8, score=None), Document(id=cf9ab236-af73-4af6-9302-b3c7ffdd9ca7, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=9, score=None)]}, transformer_setups={'data_transformer': Sequential(\n", + " (0): TextSplitter(split_by=word, chunk_size=400, chunk_overlap=200)\n", + " (1): ToEmbeddings(\n", + " batch_size=100\n", + " (embedder): Embedder(\n", + " model_kwargs={'model': 'text-embedding-3-small', 'dimensions': 256, 'encoding_format': 'float'}, \n", + " (model_client): OpenAIClient()\n", + " )\n", + " (batch_embedder): BatchEmbedder(\n", + " (embedder): Embedder(\n", + " model_kwargs={'model': 'text-embedding-3-small', 'dimensions': 256, 'encoding_format': 'float'}, \n", + " (model_client): OpenAIClient()\n", + " )\n", + " )\n", + " )\n", + " )}, mapper_setups={}, index_path='/root/.adalflow/index.faiss')\n", + " (retriever): FAISSRetriever(\n", + " top_k=5, metric=prob, dimensions=256, total_documents=17\n", + " (embedder): Embedder(\n", + " model_kwargs={'model': 'text-embedding-3-small', 'dimensions': 256, 'encoding_format': 'float'}, \n", + " (model_client): OpenAIClient()\n", + " )\n", + " )\n", + " (retriever_output_processors): RetrieverOutputToContextStr(deduplicate=True)\n", + " (generator): Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo', 'temperature': 0.3, 'stream': False}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(\n", + " template: \n", + " {# task desc #}\n", + " {% if task_desc_str %}\n", + " {{task_desc_str}}\n", + " {% else %}\n", + " You are a helpful assistant.\n", + " {% endif %}\n", + " {#input format#}\n", + " {% if input_format_str %}\n", + " \n", + " {{input_format_str}}\n", + " \n", + " {% endif %}\n", + " {# output format #}\n", + " {% if output_format_str %}\n", + " \n", + " {{output_format_str}}\n", + " \n", + " {% endif %}\n", + " {# tools #}\n", + " {% if tools_str %}\n", + " \n", + " {{tools_str}}\n", + " \n", + " {% endif %}\n", + " {# example #}\n", + " {% if examples_str %}\n", + " \n", + " {{examples_str}}\n", + " \n", + " {% endif %}\n", + " {# chat history #}\n", + " {% if chat_history_str %}\n", + " \n", + " {{chat_history_str}}\n", + " \n", + " {% endif %}\n", + " {#contex#}\n", + " {% if context_str %}\n", + " \n", + " {{context_str}}\n", + " \n", + " {% endif %}\n", + " \n", + " \n", + " {% if input_str %}\n", + " {{input_str}}\n", + " {% endif %}\n", + " \n", + " {# steps #}\n", + " {% if steps_str %}\n", + " \n", + " {{steps_str}}\n", + " \n", + " {% endif %}\n", + " , prompt_kwargs: {'task_desc_str': '\\nYou are a helpful assistant.\\n\\nYour task is to answer the query that may or may not come with context information.\\nWhen context is provided, you should stick to the context and less on your prior knowledge to answer the query.\\n\\nOutput JSON format:\\n{\\n \"answer\": \"The answer to the query\",\\n}'}, prompt_variables: ['examples_str', 'context_str', 'chat_history_str', 'tools_str', 'task_desc_str', 'input_str', 'input_format_str', 'output_format_str', 'steps_str']\n", + " )\n", + " (model_client): OpenAIClient()\n", + " (output_processors): JsonParser()\n", + " )\n", + ")\n", + "Response: (GeneratorOutput(id=None, data={'answer': \"Li Yin's hobby is rock climbing and profession is an AI researcher and a software engineer.\"}, error=None, usage=CompletionUsage(completion_tokens=25, prompt_tokens=2713, total_tokens=2738), raw_response='{\\n \"answer\": \"Li Yin\\'s hobby is rock climbing and profession is an AI researcher and a software engineer.\"\\n}', metadata=None), ' My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense ')\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Add more documents at runtime\n", + "doc3 = Document(\n", + " meta_data={\"title\": \"Apple's profile\"},\n", + " text=\"Apple is a cute dog with black and tan fur\" + \"lots of nonsense text\" * 500,\n", + " id=\"doc3\",\n", + ")\n", + "doc4 = Document(\n", + " meta_data={\"title\": \"Apple's characteristics\"},\n", + " text=\"lots of more nonsense text\" * 250\n", + " + \"Apple is energetic, loves to play with her monkey toy\"\n", + " + \"lots of more nonsense text\" * 250,\n", + " id=\"doc4\",\n", + ")\n", + "\n", + "rag.add_documents([doc3, doc4])\n", + "rag.prepare_retriever()\n", + "\n", + "# Test a new query\n", + "query = \"What is Apple's favorite toy?\"\n", + "response = rag.call(query)\n", + "print(f\"Response: {response}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "bcC1-dCheVEC", + "outputId": "133bab3f-ff2e-40db-99dc-71d64af6283f" + }, + "execution_count": 9, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Prepare initial documents\n", - "doc1 = Document(\n", - " meta_data={\"title\": \"Li Yin's profile\"},\n", - " text=\"My name is Li Yin, I love rock climbing\" + \"lots of nonsense text\" * 500,\n", - " id=\"doc1\",\n", - ")\n", - "doc2 = Document(\n", - " meta_data={\"title\": \"Interviewing Li Yin\"},\n", - " text=\"lots of more nonsense text\" * 250\n", - " + \"Li Yin is an AI researcher and a software engineer\"\n", - " + \"lots of more nonsense text\" * 250,\n", - " id=\"doc2\",\n", - ")\n", - "\n", - "# Prepare the database (only runs once)\n", - "prepare_database_with_index([doc1, doc2], index_file=\"index.faiss\")\n", - "\n", - "# Initialize RAG\n", - "rag = RAG(index_file=\"index.faiss\")\n", - "print(rag)\n", - "\n", - "# Query the RAG system\n", - "query = \"What is Li Yin's hobby and profession?\"\n", - "response = rag.call(query)\n", - "print(f\"Response: {response}\")\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "sPnx4PY34D1j", - "outputId": "f66d6f1a-70bf-40e9-a160-591fcfdcbed3" - }, - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Splitting Documents in Batches: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1/1 [00:00<00:00, 109.58it/s]\n", - "Batch embedding documents: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1/1 [00:01<00:00, 1.33s/it]\n", - "Adding embeddings to documents from batch: 1it [00:00, 6462.72it/s]\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Saved the state of the DB to /root/.adalflow/index.faiss\n", - "RAG(\n", - " (db): LocalDB(name='LocalDB', items=[Document(id=doc1, text='My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense te...', meta_data={'title': \"Li Yin's profile\"}, vector=[], parent_doc_id=None, order=None, score=None), Document(id=doc2, text='lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...', meta_data={'title': 'Interviewing Li Yin'}, vector=[], parent_doc_id=None, order=None, score=None)], transformed_items={'data_transformer': [Document(id=59f7f6ad-eb4c-4fdb-8d04-6dba1ee439bc, text='My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense te...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=0, score=None), Document(id=2486725e-47ff-4978-84fc-7937778b0e45, text='textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nons...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=1, score=None), Document(id=96993047-4cff-436d-b8ac-e02da4ae7fec, text='nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlot...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=2, score=None), Document(id=77742f90-0c0c-4143-802d-3557577d4935, text='of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense text...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=3, score=None), Document(id=81ba770e-c5f2-4dc5-98fc-349ab9143ef9, text='textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nons...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=4, score=None), Document(id=dff6f5e3-5929-4e3c-ba5f-79f5116c1fa3, text='nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlot...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=5, score=None), Document(id=1e7888e2-0783-40b2-ab85-067e3ba71fad, text='of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense text...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=6, score=None), Document(id=2deb945f-dfb9-46d3-a60b-dae77e2f5fd8, text='lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=0, score=None), Document(id=3d9c21aa-d583-47fe-b143-710b4bc4a8b2, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=1, score=None), Document(id=a318ffea-2542-4493-ab2d-03d10a94e860, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=2, score=None), Document(id=b5c05820-7545-43a8-a4a3-691c5ccc79d1, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=3, score=None), Document(id=a739cd3e-8826-4e74-afa9-499498115621, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=4, score=None), Document(id=7153cde2-b6ee-4485-91e9-9de2f4bd45ab, text='textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsens...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=5, score=None), Document(id=c3f3ed48-acc2-41b5-b4ac-a6107b651789, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=6, score=None), Document(id=7bfd84e6-0025-4cfa-8c0a-63c9de9a8d4a, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=7, score=None), Document(id=8bece98d-65f0-4dd1-9407-d1c54413bef4, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=8, score=None), Document(id=cf9ab236-af73-4af6-9302-b3c7ffdd9ca7, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=9, score=None)]}, transformer_setups={'data_transformer': Sequential(\n", - " (0): TextSplitter(split_by=word, chunk_size=400, chunk_overlap=200)\n", - " (1): ToEmbeddings(\n", - " batch_size=100\n", - " (embedder): Embedder(\n", - " model_kwargs={'model': 'text-embedding-3-small', 'dimensions': 256, 'encoding_format': 'float'}, \n", - " (model_client): OpenAIClient()\n", - " )\n", - " (batch_embedder): BatchEmbedder(\n", - " (embedder): Embedder(\n", - " model_kwargs={'model': 'text-embedding-3-small', 'dimensions': 256, 'encoding_format': 'float'}, \n", - " (model_client): OpenAIClient()\n", - " )\n", - " )\n", - " )\n", - " )}, mapper_setups={}, index_path='/root/.adalflow/index.faiss')\n", - " (retriever): FAISSRetriever(\n", - " top_k=5, metric=prob, dimensions=256, total_documents=17\n", - " (embedder): Embedder(\n", - " model_kwargs={'model': 'text-embedding-3-small', 'dimensions': 256, 'encoding_format': 'float'}, \n", - " (model_client): OpenAIClient()\n", - " )\n", - " )\n", - " (retriever_output_processors): RetrieverOutputToContextStr(deduplicate=True)\n", - " (generator): Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo', 'temperature': 0.3, 'stream': False}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(\n", - " template: \n", - " {# task desc #}\n", - " {% if task_desc_str %}\n", - " {{task_desc_str}}\n", - " {% else %}\n", - " You are a helpful assistant.\n", - " {% endif %}\n", - " {#input format#}\n", - " {% if input_format_str %}\n", - " \n", - " {{input_format_str}}\n", - " \n", - " {% endif %}\n", - " {# output format #}\n", - " {% if output_format_str %}\n", - " \n", - " {{output_format_str}}\n", - " \n", - " {% endif %}\n", - " {# tools #}\n", - " {% if tools_str %}\n", - " \n", - " {{tools_str}}\n", - " \n", - " {% endif %}\n", - " {# example #}\n", - " {% if examples_str %}\n", - " \n", - " {{examples_str}}\n", - " \n", - " {% endif %}\n", - " {# chat history #}\n", - " {% if chat_history_str %}\n", - " \n", - " {{chat_history_str}}\n", - " \n", - " {% endif %}\n", - " {#contex#}\n", - " {% if context_str %}\n", - " \n", - " {{context_str}}\n", - " \n", - " {% endif %}\n", - " \n", - " \n", - " {% if input_str %}\n", - " {{input_str}}\n", - " {% endif %}\n", - " \n", - " {# steps #}\n", - " {% if steps_str %}\n", - " \n", - " {{steps_str}}\n", - " \n", - " {% endif %}\n", - " , prompt_kwargs: {'task_desc_str': '\\nYou are a helpful assistant.\\n\\nYour task is to answer the query that may or may not come with context information.\\nWhen context is provided, you should stick to the context and less on your prior knowledge to answer the query.\\n\\nOutput JSON format:\\n{\\n \"answer\": \"The answer to the query\",\\n}'}, prompt_variables: ['examples_str', 'context_str', 'chat_history_str', 'tools_str', 'task_desc_str', 'input_str', 'input_format_str', 'output_format_str', 'steps_str']\n", - " )\n", - " (model_client): OpenAIClient()\n", - " (output_processors): JsonParser()\n", - " )\n", - ")\n", - "Response: (GeneratorOutput(id=None, data={'answer': \"Li Yin's hobby is rock climbing and profession is an AI researcher and a software engineer.\"}, error=None, usage=CompletionUsage(completion_tokens=25, prompt_tokens=2713, total_tokens=2738), raw_response='{\\n \"answer\": \"Li Yin\\'s hobby is rock climbing and profession is an AI researcher and a software engineer.\"\\n}', metadata=None), ' My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense ')\n" - ] - } - ] + "output_type": "stream", + "name": "stderr", + "text": [ + "Splitting Documents in Batches: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1/1 [00:00<00:00, 114.76it/s]\n", + "Batch embedding documents: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1/1 [00:00<00:00, 1.35it/s]\n", + "Adding embeddings to documents from batch: 1it [00:00, 1915.21it/s]\n" + ] }, { - "cell_type": "code", - "source": [ - "# Add more documents at runtime\n", - "doc3 = Document(\n", - " meta_data={\"title\": \"Apple's profile\"},\n", - " text=\"Apple is a cute dog with black and tan fur\" + \"lots of nonsense text\" * 500,\n", - " id=\"doc3\",\n", - ")\n", - "doc4 = Document(\n", - " meta_data={\"title\": \"Apple's characteristics\"},\n", - " text=\"lots of more nonsense text\" * 250\n", - " + \"Apple is energetic, loves to play with her monkey toy\"\n", - " + \"lots of more nonsense text\" * 250,\n", - " id=\"doc4\",\n", - ")\n", - "\n", - "rag.add_documents([doc3, doc4])\n", - "rag.prepare_retriever()\n", - "\n", - "# Test a new query\n", - "query = \"What is Apple's favorite toy?\"\n", - "response = rag.call(query)\n", - "print(f\"Response: {response}\")\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bcC1-dCheVEC", - "outputId": "133bab3f-ff2e-40db-99dc-71d64af6283f" - }, - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Splitting Documents in Batches: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1/1 [00:00<00:00, 114.76it/s]\n", - "Batch embedding documents: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1/1 [00:00<00:00, 1.35it/s]\n", - "Adding embeddings to documents from batch: 1it [00:00, 1915.21it/s]\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Saved the state of the DB to /root/.adalflow/index.faiss\n", - "Response: (GeneratorOutput(id=None, data={'answer': \"Apple's favorite toy is her monkey toy.\"}, error=None, usage=CompletionUsage(completion_tokens=16, prompt_tokens=2647, total_tokens=2663), raw_response='{\\n \"answer\": \"Apple\\'s favorite toy is her monkey toy.\"\\n}', metadata=None), ' Apple is a cute dog with black and tan furlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots textApple is energetic, loves to play with her monkey toylots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textApple is energetic, loves to play with her monkey toylots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more ')\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Saved the state of the DB to /root/.adalflow/index.faiss\n", + "Response: (GeneratorOutput(id=None, data={'answer': \"Apple's favorite toy is her monkey toy.\"}, error=None, usage=CompletionUsage(completion_tokens=16, prompt_tokens=2647, total_tokens=2663), raw_response='{\\n \"answer\": \"Apple\\'s favorite toy is her monkey toy.\"\\n}', metadata=None), ' Apple is a cute dog with black and tan furlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots textApple is energetic, loves to play with her monkey toylots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textApple is energetic, loves to play with her monkey toylots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more ')\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# View all documents in the database\n", + "print(\"All documents in the database:\")\n", + "for item in rag.db.items:\n", + " print(\n", + " f\"ID: {item.id}, Title: {item.meta_data['title']}, Text: {item.text[:100]}...\"\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "o9TzVv5GeZZ2", + "outputId": "bde56355-186c-4013-d702-b4530f82881b" + }, + "execution_count": 10, + "outputs": [ { - "cell_type": "code", - "source": [ - "# View all documents in the database\n", - "print(\"All documents in the database:\")\n", - "for item in rag.db.items:\n", - " print(f\"ID: {item.id}, Title: {item.meta_data['title']}, Text: {item.text[:100]}...\")\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "o9TzVv5GeZZ2", - "outputId": "bde56355-186c-4013-d702-b4530f82881b" - }, - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "All documents in the database:\n", - "ID: doc1, Title: Li Yin's profile, Text: My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense te...\n", - "ID: doc2, Title: Interviewing Li Yin, Text: lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...\n", - "ID: doc3, Title: Apple's profile, Text: Apple is a cute dog with black and tan furlots of nonsense textlots of nonsense textlots of nonsense...\n", - "ID: doc4, Title: Apple's characteristics, Text: lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "All documents in the database:\n", + "ID: doc1, Title: Li Yin's profile, Text: My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense te...\n", + "ID: doc2, Title: Interviewing Li Yin, Text: lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...\n", + "ID: doc3, Title: Apple's profile, Text: Apple is a cute dog with black and tan furlots of nonsense textlots of nonsense textlots of nonsense...\n", + "ID: doc4, Title: Apple's characteristics, Text: lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...\n" + ] } - ] + ] + } + ] } diff --git a/notebooks/tutorials/adalflow_text_splitter.ipynb b/notebooks/tutorials/adalflow_text_splitter.ipynb index 66fb81c7..4008f45a 100644 --- a/notebooks/tutorials/adalflow_text_splitter.ipynb +++ b/notebooks/tutorials/adalflow_text_splitter.ipynb @@ -31,7 +31,7 @@ "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", "\n", "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", "\n", "print(\"API keys have been set.\")" ] @@ -76,16 +76,11 @@ "from adalflow.core.types import Document\n", "\n", "# Configure the splitter settings\n", - "text_splitter = TextSplitter(\n", - " split_by=\"word\",\n", - " chunk_size=5,\n", - " chunk_overlap=1\n", - ")\n", + "text_splitter = TextSplitter(split_by=\"word\", chunk_size=5, chunk_overlap=1)\n", "\n", "# Example document\n", "doc = Document(\n", - " text=\"Example text. More example text. Even more text to illustrate.\",\n", - " id=\"doc1\"\n", + " text=\"Example text. More example text. Even more text to illustrate.\", id=\"doc1\"\n", ")\n", "\n", "# Execute the splitting\n", @@ -135,18 +130,13 @@ "from adalflow.core.types import Document\n", "\n", "# Configure the splitter settings\n", - "text_splitter = TextSplitter(\n", - " split_by=\"token\",\n", - " chunk_size=5,\n", - " chunk_overlap=0\n", - ")\n", + "text_splitter = TextSplitter(split_by=\"token\", chunk_size=5, chunk_overlap=0)\n", "\n", "doc = Document(\n", - " text=\"Example text. More example text. Even more text to illustrate.\",\n", - " id = \"doc1\"\n", - " )\n", + " text=\"Example text. More example text. Even more text to illustrate.\", id=\"doc1\"\n", + ")\n", "\n", - "splitted_docs = (text_splitter.call(documents=[doc]))\n", + "splitted_docs = text_splitter.call(documents=[doc])\n", "\n", "for doc in splitted_docs:\n", " print(doc)" diff --git a/notebooks/tutorials/adalflow_tracing.ipynb b/notebooks/tutorials/adalflow_tracing.ipynb index 014c1b5e..ef3d2b25 100644 --- a/notebooks/tutorials/adalflow_tracing.ipynb +++ b/notebooks/tutorials/adalflow_tracing.ipynb @@ -1,183 +1,184 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Tracing\n", - "\n", - "In particular, we provide two tracing methods to help you develop and improve the Generator:\n", - "\n", - "1. Trace the history change(states) on prompt during your development process. Developers typically go through a long process of prompt optimization and it is frustrating to lose track of the prompt changes when your current change actually makes the performance much worse.\n" - ], - "metadata": { - "id": "lLGpv1fLLIjF" - } - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "sfKEfaYC3Go7" - }, - "outputs": [], - "source": [ - "from IPython.display import clear_output\n", - "\n", - "!pip install -U adalflow[openai,groq,faiss-cpu]\n", - "\n", - "clear_output()\n" - ] - }, - { - "cell_type": "code", - "source": [ - "import os\n", - "from getpass import getpass\n", - "\n", - "# Prompt user to enter their API keys securely\n", - "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", - "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", - "\n", - "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "os.environ['GROQ_API_KEY'] = groq_api_key\n", - "\n", - "print(\"API keys have been set.\")\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-4c_AGBt3PlR", - "outputId": "85aba038-ee9c-463d-bdbd-027cbfff0094" - }, - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", - "Please enter your GROQ API key: ยทยทยทยทยทยทยทยทยทยท\n", - "API keys have been set.\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "We created a GeneratorStateLogger to handle the logging and saving into json files. To further simplify developersโ€™s process, we provides a class decorator trace_generator_states where a single line of code can be added to any of your task component. It will automatically track any attributes of type Generator." - ], - "metadata": { - "id": "yWi2uEiE6UIf" - } - }, - { - "cell_type": "code", - "source": [ - "from adalflow.tracing import trace_generator_states\n", - "from adalflow.core import Component, Generator\n", - "import adalflow as adal\n", - "from adalflow.components.model_client import OpenAIClient\n", - "\n", - "template_doc = r\"\"\" You are a doctor User: {{input_str}}\"\"\"\n", - "\n", - "@trace_generator_states()\n", - "class DocQA(adal.Component):\n", - " def __init__(self):\n", - " super(DocQA, self).__init__()\n", - " self.generator = Generator(\n", - " template=template_doc,\n", - " model_client=OpenAIClient(),\n", - " model_kwargs={\"model\": \"gpt-4o-mini\"},\n", - " )\n", - "\n", - " def call(self, query: str) -> str:\n", - " return self.doc(prompt_kwargs={\"input_str\": query}).data\n" - ], - "metadata": { - "id": "qk9pkcCVzdek" - }, - "execution_count": 13, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "Here is the folder structer of where the trace is generated as a .json file and also an example output below" - ], - "metadata": { - "id": "LAZUSnYn-lnI" - } - }, - { - "cell_type": "markdown", - "source": [ - "![image.png]()" - ], - "metadata": { - "id": "cVofNXVW-EMo" - } + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Tracing\n", + "\n", + "In particular, we provide two tracing methods to help you develop and improve the Generator:\n", + "\n", + "1. Trace the history change(states) on prompt during your development process. Developers typically go through a long process of prompt optimization and it is frustrating to lose track of the prompt changes when your current change actually makes the performance much worse.\n" + ], + "metadata": { + "id": "lLGpv1fLLIjF" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "sfKEfaYC3Go7" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai,groq,faiss-cpu]\n", + "\n", + "clear_output()" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", + "\n", + "# Set environment variables\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "os.environ[\"GROQ_API_KEY\"] = groq_api_key\n", + "\n", + "print(\"API keys have been set.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "-4c_AGBt3PlR", + "outputId": "85aba038-ee9c-463d-bdbd-027cbfff0094" + }, + "execution_count": 2, + "outputs": [ { - "cell_type": "code", - "source": [ - "'''\n", - " {\n", - " \"doc\": [\n", - " {\n", - " \"prompt_states\": {\n", - " \"type\": \"Prompt\",\n", - " \"data\": {\n", - " \"_components\": {\n", - " \"_ordered_dict\": true,\n", - " \"data\": []\n", - " },\n", - " \"_parameters\": {\n", - " \"_ordered_dict\": true,\n", - " \"data\": []\n", - " },\n", - " \"training\": false,\n", - " \"teacher_mode\": false,\n", - " \"tracing\": false,\n", - " \"name\": \"Prompt\",\n", - " \"_init_args\": {\n", - " \"template\": null,\n", - " \"prompt_kwargs\": {}\n", - " },\n", - " \"template\": \" You are a doctor User: {{input_str}}\",\n", - " \"prompt_variables\": [\n", - " \"input_str\"\n", - " ],\n", - " \"prompt_kwargs\": {}\n", - " }\n", - " },\n", - " \"time_stamp\": \"2024-11-29T12:36:33.302956\"\n", - " }\n", - " ]\n", - "}\n", - "'''" - ], - "metadata": { - "id": "dPd9i6_t7ERJ" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ยทยทยทยทยทยทยทยทยทยท\n", + "Please enter your GROQ API key: ยทยทยทยทยทยทยทยทยทยท\n", + "API keys have been set.\n" + ] } - ] + ] + }, + { + "cell_type": "markdown", + "source": [ + "We created a GeneratorStateLogger to handle the logging and saving into json files. To further simplify developersโ€™s process, we provides a class decorator trace_generator_states where a single line of code can be added to any of your task component. It will automatically track any attributes of type Generator." + ], + "metadata": { + "id": "yWi2uEiE6UIf" + } + }, + { + "cell_type": "code", + "source": [ + "from adalflow.tracing import trace_generator_states\n", + "from adalflow.core import Component, Generator\n", + "import adalflow as adal\n", + "from adalflow.components.model_client import OpenAIClient\n", + "\n", + "template_doc = r\"\"\" You are a doctor User: {{input_str}}\"\"\"\n", + "\n", + "\n", + "@trace_generator_states()\n", + "class DocQA(adal.Component):\n", + " def __init__(self):\n", + " super(DocQA, self).__init__()\n", + " self.generator = Generator(\n", + " template=template_doc,\n", + " model_client=OpenAIClient(),\n", + " model_kwargs={\"model\": \"gpt-4o-mini\"},\n", + " )\n", + "\n", + " def call(self, query: str) -> str:\n", + " return self.doc(prompt_kwargs={\"input_str\": query}).data" + ], + "metadata": { + "id": "qk9pkcCVzdek" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Here is the folder structer of where the trace is generated as a .json file and also an example output below" + ], + "metadata": { + "id": "LAZUSnYn-lnI" + } + }, + { + "cell_type": "markdown", + "source": [ + "![image.png]()" + ], + "metadata": { + "id": "cVofNXVW-EMo" + } + }, + { + "cell_type": "code", + "source": [ + "\"\"\"\n", + " {\n", + " \"doc\": [\n", + " {\n", + " \"prompt_states\": {\n", + " \"type\": \"Prompt\",\n", + " \"data\": {\n", + " \"_components\": {\n", + " \"_ordered_dict\": true,\n", + " \"data\": []\n", + " },\n", + " \"_parameters\": {\n", + " \"_ordered_dict\": true,\n", + " \"data\": []\n", + " },\n", + " \"training\": false,\n", + " \"teacher_mode\": false,\n", + " \"tracing\": false,\n", + " \"name\": \"Prompt\",\n", + " \"_init_args\": {\n", + " \"template\": null,\n", + " \"prompt_kwargs\": {}\n", + " },\n", + " \"template\": \" You are a doctor User: {{input_str}}\",\n", + " \"prompt_variables\": [\n", + " \"input_str\"\n", + " ],\n", + " \"prompt_kwargs\": {}\n", + " }\n", + " },\n", + " \"time_stamp\": \"2024-11-29T12:36:33.302956\"\n", + " }\n", + " ]\n", + "}\n", + "\"\"\"" + ], + "metadata": { + "id": "dPd9i6_t7ERJ" + }, + "execution_count": null, + "outputs": [] + } + ] } diff --git a/tutorials/database.ipynb b/tutorials/database.ipynb index 8744b1b0..2db749f7 100644 --- a/tutorials/database.ipynb +++ b/tutorials/database.ipynb @@ -15,26 +15,26 @@ "outputs": [], "source": [ "# setup data needed for the notes\n", - "query_1 = \"What are the benefits of renewable energy?\" # gt is [0, 3]\n", - "query_2 = \"How do solar panels impact the environment?\" # gt is [1, 2]\n", + "query_1 = \"What are the benefits of renewable energy?\" # gt is [0, 3]\n", + "query_2 = \"How do solar panels impact the environment?\" # gt is [1, 2]\n", "\n", - "org_documents =[\n", + "org_documents = [\n", " {\n", " \"title\": \"The Impact of Renewable Energy on the Economy\",\n", - " \"content\": \"Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.\"\n", + " \"content\": \"Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.\",\n", " },\n", " {\n", " \"title\": \"Understanding Solar Panels\",\n", - " \"content\": \"Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.\"\n", + " \"content\": \"Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.\",\n", " },\n", " {\n", " \"title\": \"Pros and Cons of Solar Energy\",\n", - " \"content\": \"While solar energy offers substantial environmental benefits, such as reducing carbon footprints and pollution, it also has downsides. The production of solar panels can lead to hazardous waste, and large solar farms require significant land, which can disrupt local ecosystems.\"\n", + " \"content\": \"While solar energy offers substantial environmental benefits, such as reducing carbon footprints and pollution, it also has downsides. The production of solar panels can lead to hazardous waste, and large solar farms require significant land, which can disrupt local ecosystems.\",\n", " },\n", " {\n", - " \"title\": \"Renewable Energy and Its Effects\",\n", - " \"content\": \"Renewable energy sources like wind, solar, and hydro power play a crucial role in combating climate change. They do not produce greenhouse gases during operation, making them essential for sustainable development. However, the initial setup and material sourcing for these technologies can still have environmental impacts.\"\n", - " }\n", + " \"title\": \"Renewable Energy and Its Effects\",\n", + " \"content\": \"Renewable energy sources like wind, solar, and hydro power play a crucial role in combating climate change. They do not produce greenhouse gases during operation, making them essential for sustainable development. However, the initial setup and material sourcing for these technologies can still have environmental impacts.\",\n", + " },\n", "]\n", "\n", "turns = [\n", @@ -42,14 +42,14 @@ " \"user\": \"What are the benefits of renewable energy?\",\n", " \"system\": \"I can see you are interested in renewable energy. Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.\",\n", " \"user_time\": \"2021-09-01T12:00:00Z\",\n", - " \"system_time\": \"2021-09-01T12:00:01Z\"\n", + " \"system_time\": \"2021-09-01T12:00:01Z\",\n", " },\n", " {\n", " \"user\": \"How do solar panels impact the environment?\",\n", " \"system\": \"Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.\",\n", " \"user_time\": \"2021-09-01T12:00:02Z\",\n", - " \"system_time\": \"2021-09-01T12:00:03Z\"\n", - " }\n", + " \"system_time\": \"2021-09-01T12:00:03Z\",\n", + " },\n", "]" ] }, @@ -78,7 +78,10 @@ "from adalflow.core.types import Document\n", "\n", "# we will save the content to text and title in the meta_data\n", - "documents = [Document(text=doc['content'], meta_data={'title': doc['title']}) for doc in org_documents]\n", + "documents = [\n", + " Document(text=doc[\"content\"], meta_data={\"title\": doc[\"title\"]})\n", + " for doc in org_documents\n", + "]\n", "print(documents)" ] }, @@ -151,7 +154,7 @@ "source": [ "# prepare the data pipeline\n", "\n", - "from adalflow.core.embedder import Embedder \n", + "from adalflow.core.embedder import Embedder\n", "from adalflow.core.types import ModelClientType\n", "from adalflow.components.data_process import DocumentSplitter, ToEmbeddings\n", "from adalflow.core.component import Sequential\n", @@ -163,14 +166,10 @@ " \"encoding_format\": \"float\",\n", "}\n", "\n", - "splitter_config = {\n", - " \"split_by\": \"word\",\n", - " \"split_length\": 50,\n", - " \"split_overlap\": 10\n", - "}\n", + "splitter_config = {\"split_by\": \"word\", \"split_length\": 50, \"split_overlap\": 10}\n", "\n", "splitter = DocumentSplitter(**splitter_config)\n", - "embedder = Embedder(model_client =ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n", + "embedder = Embedder(model_client=ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n", "embedder_transformer = ToEmbeddings(embedder, batch_size=2)\n", "data_transformer = Sequential(splitter, embedder_transformer)\n", "print(data_transformer)" @@ -185,13 +184,19 @@ "# prepare mapping functions to map the data to Document object for the pipeline\n", "\n", "from typing import Dict\n", + "\n", + "\n", "# mapping function for org_documents\n", "def map_to_document(doc: Dict) -> Document:\n", - " return Document(text=doc['content'], meta_data={'title': doc['title']})\n", + " return Document(text=doc[\"content\"], meta_data={\"title\": doc[\"title\"]})\n", + "\n", "\n", "def map_dialogturn_to_document(turn: DialogTurn) -> Document:\n", " # it can be important to keep the original data's id\n", - " return Document(id=turn.id, text=turn.user_query.query_str + ' ' + turn.assistant_response.response_str)" + " return Document(\n", + " id=turn.id,\n", + " text=turn.user_query.query_str + \" \" + turn.assistant_response.response_str,\n", + " )" ] }, { @@ -304,7 +309,7 @@ "# create a db for the dialog_turns\n", "from adalflow.core.db import LocalDB\n", "\n", - "dialog_turn_db = LocalDB('dialog_turns')\n", + "dialog_turn_db = LocalDB(\"dialog_turns\")\n", "print(dialog_turn_db)\n", "\n", "dialog_turn_db.load(dialog_turns)\n", @@ -397,7 +402,7 @@ ], "source": [ "# save the state of the dialog_turn_db\n", - "dialog_turn_db.save_state('dialog_turn_db_state.pkl')\n", + "dialog_turn_db.save_state(\"dialog_turn_db_state.pkl\")\n", "\n", "print(dialog_turn_db)" ] @@ -432,8 +437,8 @@ ], "source": [ "# restore the state of the restored_dialog_turn_db\n", - "restored_dialog_turn_db = LocalDB.load_state('dialog_turn_db_state.pkl')\n", - "print(restored_dialog_turn_db)\n" + "restored_dialog_turn_db = LocalDB.load_state(\"dialog_turn_db_state.pkl\")\n", + "print(restored_dialog_turn_db)" ] }, { @@ -537,18 +542,15 @@ } ], "source": [ - "# prepare the generator for the dialog turns \n", + "# prepare the generator for the dialog turns\n", "\n", "from adalflow.core import Generator\n", "\n", - "llm_kwargs = {\n", - " \"model\": \"gpt-3.5-turbo\"\n", - "}\n", + "llm_kwargs = {\"model\": \"gpt-3.5-turbo\"}\n", "\n", "# we will use the default prompt, and using input_str and chat_history_str for the final prompt\n", - "generator = Generator(model_client = ModelClientType.OPENAI(), model_kwargs=llm_kwargs)\n", - "print(generator)\n", - "\n" + "generator = Generator(model_client=ModelClientType.OPENAI(), model_kwargs=llm_kwargs)\n", + "print(generator)" ] }, { @@ -614,16 +616,18 @@ ], "source": [ "# lets see how the prompt will be if we pass the input_str and chat_history_str\n", - "input_str = \"What are the benefits of renewable energy? Did I ask this before?\" \n", + "input_str = \"What are the benefits of renewable energy? Did I ask this before?\"\n", + "\n", "\n", "def format_chat_history_str(turns: list) -> str:\n", " chat_history_str = []\n", " for turn in turns:\n", - " chat_history_str.append(turn.to_yaml()) # format as yaml\n", + " chat_history_str.append(turn.to_yaml()) # format as yaml\n", " # join with newline\n", - " chat_history_str = '\\n_________\\n'.join(chat_history_str)\n", + " chat_history_str = \"\\n_________\\n\".join(chat_history_str)\n", " return chat_history_str\n", "\n", + "\n", "chat_history_str = format_chat_history_str(dialog_turns)\n", "print(generator.print_prompt(input_str=input_str, chat_history_str=chat_history_str))" ] @@ -663,31 +667,33 @@ } ], "source": [ - "# as we have quite a bit of empty fields, lets exclude them \n", + "# as we have quite a bit of empty fields, lets exclude them\n", "from typing import List\n", "\n", - "input_str = \"What are the benefits of renewable energy? Did I ask this before?\" \n", + "input_str = \"What are the benefits of renewable energy? Did I ask this before?\"\n", + "\n", "\n", "def format_chat_history_str(turns: List[DialogTurn]) -> str:\n", " chat_history_str = []\n", " for turn in turns:\n", " chat_history_str.append(\n", - " turn.to_yaml(\n", - " exclude=[\n", - " \"id\",\n", - " \"user_id\",\n", - " \"session_id\",\n", - " \"user_query_timestamp\",\n", - " \"assistant_response_timestamp\",\n", - " \"order\",\n", - " \"metadata\",\n", - " \"vector\",\n", - " ],\n", - " )\n", - " ) \n", - " chat_history_str = '\\n_________\\n'.join(chat_history_str)\n", + " turn.to_yaml(\n", + " exclude=[\n", + " \"id\",\n", + " \"user_id\",\n", + " \"session_id\",\n", + " \"user_query_timestamp\",\n", + " \"assistant_response_timestamp\",\n", + " \"order\",\n", + " \"metadata\",\n", + " \"vector\",\n", + " ],\n", + " )\n", + " )\n", + " chat_history_str = \"\\n_________\\n\".join(chat_history_str)\n", " return chat_history_str\n", "\n", + "\n", "chat_history_str = format_chat_history_str(dialog_turn_db.items[0:1])\n", "print(generator.print_prompt(input_str=input_str, chat_history_str=chat_history_str))" ] @@ -861,6 +867,7 @@ "source": [ "# we will use the retriever to find top_k chunked documents, from its partent_doc_id, we will find the initial dialog_turn, and feed that to the generator\n", "from adalflow.utils.logger import get_logger\n", + "\n", "get_logger()\n", "\n", "embeddings = [item.vector for item in dialog_turn_db.transformed_items[key]]\n", @@ -868,7 +875,7 @@ "retriever.build_index_from_documents(documents=embeddings)\n", "\n", "# top_k_documents = retriever(input=input_str)\n", - "# print(top_k_documents)\n" + "# print(top_k_documents)" ] }, { @@ -907,7 +914,12 @@ "source": [ "# get the parent_doc_id from the top_k_documents\n", "\n", - "parent_doc_ids = set([dialog_turn_db.transformed_items[key][doc_index].parent_doc_id for doc_index in top_k_documents[0].doc_indices])\n", + "parent_doc_ids = set(\n", + " [\n", + " dialog_turn_db.transformed_items[key][doc_index].parent_doc_id\n", + " for doc_index in top_k_documents[0].doc_indices\n", + " ]\n", + ")\n", "print(parent_doc_ids)" ] }, @@ -917,7 +929,9 @@ "metadata": {}, "outputs": [], "source": [ - "fetched_dialog_turns=dialog_turn_db.fetch_items(condition=lambda x: x.id in parent_doc_ids)" + "fetched_dialog_turns = dialog_turn_db.fetch_items(\n", + " condition=lambda x: x.id in parent_doc_ids\n", + ")" ] }, { @@ -941,7 +955,9 @@ "source": [ "chat_history_str = format_chat_history_str(fetched_dialog_turns)\n", "\n", - "output = generator(prompt_kwargs={\"input_str\": input_str, \"chat_history_str\": chat_history_str})\n", + "output = generator(\n", + " prompt_kwargs={\"input_str\": input_str, \"chat_history_str\": chat_history_str}\n", + ")\n", "print(output)" ] } diff --git a/tutorials/dataclass.ipynb b/tutorials/dataclass.ipynb index e2631c2b..06be8204 100644 --- a/tutorials/dataclass.ipynb +++ b/tutorials/dataclass.ipynb @@ -8,23 +8,23 @@ "source": [ "from dataclasses import dataclass, field\n", "\n", + "\n", "@dataclass\n", "class Question:\n", - " question: str = field(\n", - " metadata={\"desc\": \"The question asked by the user\"}\n", - " )\n", + " question: str = field(metadata={\"desc\": \"The question asked by the user\"})\n", " metadata: dict = field(\n", " metadata={\"desc\": \"The metadata of the question\"}, default_factory=dict\n", " )\n", "\n", + "\n", "@dataclass\n", "class TrecData:\n", " question: Question = field(\n", " metadata={\"desc\": \"The question asked by the user\"}\n", - " ) # Required field, you have to provide the question field at the instantiation\n", + " ) # Required field, you have to provide the question field at the instantiation\n", " label: int = field(\n", " metadata={\"desc\": \"The label of the question\"}, default=0\n", - " ) # Optional field" + " ) # Optional field" ] }, { @@ -49,6 +49,7 @@ "print(example)\n", "\n", "from dataclasses import asdict\n", + "\n", "print(asdict(example))\n", "reconstructed = TrecData(**asdict(example))\n", "print(reconstructed)\n", @@ -97,22 +98,24 @@ "metadata": {}, "outputs": [], "source": [ - "# lets see what DataClass can do \n", + "# lets see what DataClass can do\n", "# 1. allow required field after optional field using required_field on default_factory\n", "\n", "from adalflow.core import DataClass, required_field\n", "\n", + "\n", "@dataclass\n", "class TrecData2(DataClass):\n", " question: Question = field(\n", " metadata={\"desc\": \"The question asked by the user\"}\n", - " ) # Required field, you have to provide the question field at the instantiation\n", + " ) # Required field, you have to provide the question field at the instantiation\n", " label: int = field(\n", " metadata={\"desc\": \"The label of the question\"}, default=0\n", - " ) # Optional field\n", + " ) # Optional field\n", " metadata: dict = field(\n", - " metadata={\"desc\": \"The metadata of the question\"}, default_factory=required_field()\n", - " ) # required field" + " metadata={\"desc\": \"The metadata of the question\"},\n", + " default_factory=required_field(),\n", + " ) # required field" ] }, { @@ -140,10 +143,10 @@ "example = TrecData2(Question(\"What is the capital of France?\"), 1, {\"key\": \"value\"})\n", "print(example)\n", "\n", - "dict_example = TrecData2.to_dict(example) # use as if its a class method\n", + "dict_example = TrecData2.to_dict(example) # use as if its a class method\n", "print(dict_example)\n", "\n", - "dict_example_2 = example.to_dict() # use it as instance method\n", + "dict_example_2 = example.to_dict() # use it as instance method\n", "print(dict_example)\n", "\n", "reconstructed = TrecData2.from_dict(dict_example)\n", @@ -178,7 +181,9 @@ "print(dict_exclude)\n", "\n", "# exclude field of the parent and child class\n", - "dict_exclude = example.to_dict(exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]})\n", + "dict_exclude = example.to_dict(\n", + " exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]}\n", + ")\n", "print(dict_exclude)" ] }, @@ -271,8 +276,7 @@ "print(example_str)\n", "\n", "example_str = example.format_example_str(DataClassFormatType.EXAMPLE_YAML)\n", - "print(example_str)\n", - "\n" + "print(example_str)" ] }, { @@ -298,7 +302,7 @@ ], "source": [ "# Now, lets check the data format using class method without instance\n", - "# schema, you can choose to only use properties \n", + "# schema, you can choose to only use properties\n", "\n", "schema = TrecData2.to_schema()\n", "schema" @@ -326,7 +330,9 @@ ], "source": [ "# schema with exclude\n", - "schema_exclude = TrecData2.to_schema(exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]})\n", + "schema_exclude = TrecData2.to_schema(\n", + " exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]}\n", + ")\n", "schema_exclude" ] }, @@ -373,7 +379,9 @@ "source": [ "# exclude field of the parent and child class\n", "\n", - "json_signature_exclude = TrecData2.to_json_signature(exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]})\n", + "json_signature_exclude = TrecData2.to_json_signature(\n", + " exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]}\n", + ")\n", "print(json_signature_exclude)" ] }, @@ -464,7 +472,7 @@ } ], "source": [ - "# use the DataClassFormatType to control it \n", + "# use the DataClassFormatType to control it\n", "\n", "from adalflow.core import DataClassFormatType\n", "\n", @@ -494,6 +502,8 @@ "source": [ "# load with customizd from dict\n", "from typing import Dict\n", + "\n", + "\n", "@dataclass\n", "class OutputFormat(DataClass):\n", " thought: str = field(\n", @@ -521,6 +531,7 @@ " }\n", " return super().from_dict(data)\n", "\n", + "\n", "data = OutputFormat.from_dict({\"coarse_label\": 1})\n", "print(data)" ] diff --git a/tutorials/embedder.ipynb b/tutorials/embedder.ipynb index 29625454..c5c60527 100644 --- a/tutorials/embedder.ipynb +++ b/tutorials/embedder.ipynb @@ -148,6 +148,7 @@ "source": [ "from adalflow.core.embedder import Embedder\n", "from adalflow.components.model_client import TransformersClient\n", + "\n", "# from adalflow.utils import enable_library_logging\n", "\n", "# enable_library_logging(level=\"DEBUG\")\n", @@ -508,8 +509,10 @@ "from typing import List\n", "from adalflow.core.component import Component\n", "from copy import deepcopy\n", + "\n", + "\n", "class DecreaseEmbeddingDim(Component):\n", - " def __init__(self, old_dim: int, new_dim: int, normalize: bool = True):\n", + " def __init__(self, old_dim: int, new_dim: int, normalize: bool = True):\n", " super().__init__()\n", " self.old_dim = old_dim\n", " self.new_dim = new_dim\n", @@ -525,7 +528,7 @@ " new_embedding = normalize_vector(new_embedding)\n", " embedding.embedding = new_embedding\n", " return output\n", - " \n", + "\n", " def _extra_repr(self) -> str:\n", " repr_str = f\"old_dim={self.old_dim}, new_dim={self.new_dim}, normalize={self.normalize}\"\n", " return repr_str" diff --git a/tutorials/generator.ipynb b/tutorials/generator.ipynb index e8a3fac2..bc93b021 100644 --- a/tutorials/generator.ipynb +++ b/tutorials/generator.ipynb @@ -49,10 +49,10 @@ "\n", "enable_library_logging(level=\"DEBUG\")\n", "\n", - "model_kwargs={\n", + "model_kwargs = {\n", " \"model\": \"gpt-3.5-turbo\",\n", " \"logprobs\": True,\n", - " \"n\": 2, # the number of chat completion choices\n", + " \"n\": 2, # the number of chat completion choices\n", "}\n", "model_client = OpenAIClient(chat_completion_parser=get_probabilities)\n", "generator = Generator(model_client=model_client, model_kwargs=model_kwargs)\n", @@ -80,7 +80,7 @@ "source": [ "from adalflow.core import Component, Generator\n", "from adalflow.components.model_client import GroqAPIClient\n", - "from adalflow.utils import setup_env # noqa\n", + "from adalflow.utils import setup_env # noqa\n", "\n", "\n", "class SimpleQA(Component):\n", @@ -93,7 +93,9 @@ " You:\n", " \"\"\"\n", " self.generator = Generator(\n", - " model_client=GroqAPIClient(), model_kwargs={\"model\": \"llama3-8b-8192\"}, template=template\n", + " model_client=GroqAPIClient(),\n", + " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", + " template=template,\n", " )\n", "\n", " def call(self, query):\n", diff --git a/tutorials/model_client.ipynb b/tutorials/model_client.ipynb index 3e5b7b06..b61e7ec7 100644 --- a/tutorials/model_client.ipynb +++ b/tutorials/model_client.ipynb @@ -36,9 +36,9 @@ "\n", "prompt = f\"User: {query}\\n\"\n", "model_kwargs = {\"model\": \"gpt-3.5-turbo\", \"temperature\": 0.5, \"max_tokens\": 100}\n", - "api_kwargs = openai_client.convert_inputs_to_api_kwargs(input=prompt, \n", - " model_kwargs=model_kwargs, \n", - " model_type=model_type)\n", + "api_kwargs = openai_client.convert_inputs_to_api_kwargs(\n", + " input=prompt, model_kwargs=model_kwargs, model_type=model_type\n", + ")\n", "print(f\"api_kwargs: {api_kwargs}\")\n", "\n", "response = openai_client.call(api_kwargs=api_kwargs, model_type=model_type)\n", @@ -49,15 +49,20 @@ "model_type = ModelType.EMBEDDER\n", "# do batch embedding\n", "input = [query] * 2\n", - "model_kwargs = {\"model\": \"text-embedding-3-small\", \"dimensions\": 8, \"encoding_format\": \"float\"}\n", - "api_kwargs = openai_client.convert_inputs_to_api_kwargs(input=input, model_kwargs=model_kwargs, model_type=model_type)\n", + "model_kwargs = {\n", + " \"model\": \"text-embedding-3-small\",\n", + " \"dimensions\": 8,\n", + " \"encoding_format\": \"float\",\n", + "}\n", + "api_kwargs = openai_client.convert_inputs_to_api_kwargs(\n", + " input=input, model_kwargs=model_kwargs, model_type=model_type\n", + ")\n", "print(f\"api_kwargs: {api_kwargs}\")\n", "\n", "\n", - "\n", "response = openai_client.call(api_kwargs=api_kwargs, model_type=model_type)\n", "reponse_embedder_output = openai_client.parse_embedding_response(response)\n", - "print(f\"reponse_embedder_output: {reponse_embedder_output}\")\n" + "print(f\"reponse_embedder_output: {reponse_embedder_output}\")" ] }, { diff --git a/tutorials/rag.ipynb b/tutorials/rag.ipynb index 8892f0a2..b5163e51 100644 --- a/tutorials/rag.ipynb +++ b/tutorials/rag.ipynb @@ -16,11 +16,12 @@ "outputs": [], "source": [ "# the data pipeline and the backend data processing\n", - "from adalflow.core.embedder import Embedder \n", + "from adalflow.core.embedder import Embedder\n", "from adalflow.core.types import ModelClientType\n", "from adalflow.components.data_process import TextSplitter, ToEmbeddings\n", "from adalflow.core.container import Sequential\n", "\n", + "\n", "def prepare_data_pipeline():\n", " model_kwargs = {\n", " \"model\": \"text-embedding-3-small\",\n", @@ -28,14 +29,12 @@ " \"encoding_format\": \"float\",\n", " }\n", "\n", - " splitter_config = {\n", - " \"split_by\": \"word\",\n", - " \"split_length\": 50,\n", - " \"split_overlap\": 10\n", - " }\n", + " splitter_config = {\"split_by\": \"word\", \"split_length\": 50, \"split_overlap\": 10}\n", "\n", " splitter = TextSplitter(**splitter_config)\n", - " embedder = Embedder(model_client =ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n", + " embedder = Embedder(\n", + " model_client=ModelClientType.OPENAI(), model_kwargs=model_kwargs\n", + " )\n", " embedder_transformer = ToEmbeddings(embedder, batch_size=2)\n", " data_transformer = Sequential(splitter, embedder_transformer)\n", " print(data_transformer)" diff --git a/tutorials/react_note.ipynb b/tutorials/react_note.ipynb index 0b647a4b..b1cc8bba 100644 --- a/tutorials/react_note.ipynb +++ b/tutorials/react_note.ipynb @@ -120,7 +120,7 @@ " test_react_agent(ModelClientType.OPENAI(), gpt_model_kwargs)\n", " print(\"Done\")\n", "\n", - " test_react_agent_use_examples(ModelClientType.GROQ(), llama3_model_kwargs)\n" + " test_react_agent_use_examples(ModelClientType.GROQ(), llama3_model_kwargs)" ] }, { @@ -134,6 +134,7 @@ "import asyncio\n", "import time\n", "\n", + "\n", "def is_running_in_event_loop() -> bool:\n", " try:\n", " loop = asyncio.get_running_loop()\n", @@ -143,7 +144,8 @@ " return False\n", " except RuntimeError:\n", " return False\n", - " \n", + "\n", + "\n", "def sync_func():\n", " time.sleep(1)\n", " print(\"Sync function\")\n", diff --git a/tutorials/retriever.ipynb b/tutorials/retriever.ipynb index c464f46b..859a6de8 100644 --- a/tutorials/retriever.ipynb +++ b/tutorials/retriever.ipynb @@ -23,26 +23,26 @@ "outputs": [], "source": [ "# decide a meaningful query and a list of documents\n", - "query_1 = \"What are the benefits of renewable energy?\" # gt is [0, 3]\n", - "query_2 = \"How do solar panels impact the environment?\" # gt is [1, 2]\n", + "query_1 = \"What are the benefits of renewable energy?\" # gt is [0, 3]\n", + "query_2 = \"How do solar panels impact the environment?\" # gt is [1, 2]\n", "\n", - "documents =[\n", + "documents = [\n", " {\n", " \"title\": \"The Impact of Renewable Energy on the Economy\",\n", - " \"content\": \"Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.\"\n", + " \"content\": \"Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.\",\n", " },\n", " {\n", " \"title\": \"Understanding Solar Panels\",\n", - " \"content\": \"Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.\"\n", + " \"content\": \"Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.\",\n", " },\n", " {\n", " \"title\": \"Pros and Cons of Solar Energy\",\n", - " \"content\": \"While solar energy offers substantial environmental benefits, such as reducing carbon footprints and pollution, it also has downsides. The production of solar panels can lead to hazardous waste, and large solar farms require significant land, which can disrupt local ecosystems.\"\n", + " \"content\": \"While solar energy offers substantial environmental benefits, such as reducing carbon footprints and pollution, it also has downsides. The production of solar panels can lead to hazardous waste, and large solar farms require significant land, which can disrupt local ecosystems.\",\n", " },\n", " {\n", - " \"title\": \"Renewable Energy and Its Effects\",\n", - " \"content\": \"Renewable energy sources like wind, solar, and hydro power play a crucial role in combating climate change. They do not produce greenhouse gases during operation, making them essential for sustainable development. However, the initial setup and material sourcing for these technologies can still have environmental impacts.\"\n", - " }\n", + " \"title\": \"Renewable Energy and Its Effects\",\n", + " \"content\": \"Renewable energy sources like wind, solar, and hydro power play a crucial role in combating climate change. They do not produce greenhouse gases during operation, making them essential for sustainable development. However, the initial setup and material sourcing for these technologies can still have environmental impacts.\",\n", + " },\n", "]" ] }, @@ -67,7 +67,7 @@ ], "source": [ "# create an embedder\n", - "from adalflow.core.embedder import Embedder \n", + "from adalflow.core.embedder import Embedder\n", "from adalflow.core.types import ModelClientType\n", "\n", "\n", @@ -77,7 +77,7 @@ " \"encoding_format\": \"float\",\n", "}\n", "\n", - "embedder = Embedder(model_client =ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n", + "embedder = Embedder(model_client=ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n", "embedder" ] }, @@ -128,7 +128,7 @@ "\n", "from adalflow.components.retriever import FAISSRetriever\n", "\n", - "# pass the documents in the initialization \n", + "# pass the documents in the initialization\n", "documents_embeddings = [x.embedding for x in output.data]\n", "retriever = FAISSRetriever(top_k=2, embedder=embedder, documents=documents_embeddings)\n", "retriever" @@ -153,7 +153,7 @@ "# execute the retriever\n", "output_1 = retriever(input=query_1)\n", "output_2 = retriever(input=query_2)\n", - "output_3 = retriever(input = [query_1, query_2])\n", + "output_3 = retriever(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -198,7 +198,7 @@ "\n", "output_1 = retriever_1(input=query_1)\n", "output_2 = retriever_1(input=query_2)\n", - "output_3 = retriever_1(input = [query_1, query_2])\n", + "output_3 = retriever_1(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -229,7 +229,9 @@ "\n", "document_map_func = lambda x: x[\"content\"]\n", "\n", - "bm25_retriever = BM25Retriever(top_k=2, documents=documents, document_map_func=document_map_func)\n", + "bm25_retriever = BM25Retriever(\n", + " top_k=2, documents=documents, document_map_func=document_map_func\n", + ")\n", "print(bm25_retriever)" ] }, @@ -250,7 +252,10 @@ "source": [ "# show how a word splitter and a token splitter differs\n", "\n", - "from adalflow.components.retriever.bm25_retriever import split_text_by_word_fn_then_lower_tokenized, split_text_by_word_fn\n", + "from adalflow.components.retriever.bm25_retriever import (\n", + " split_text_by_word_fn_then_lower_tokenized,\n", + " split_text_by_word_fn,\n", + ")\n", "\n", "query_1_words = split_text_by_word_fn(query_1)\n", "query_1_tokens = split_text_by_word_fn_then_lower_tokenized(query_1)\n", @@ -277,7 +282,7 @@ "source": [ "output_1 = bm25_retriever(input=query_1)\n", "output_2 = bm25_retriever(input=query_2)\n", - "output_3 = bm25_retriever(input = [query_1, query_2])\n", + "output_3 = bm25_retriever(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -324,7 +329,7 @@ "\n", "output_1 = bm25_retriever(input=query_1_short)\n", "output_2 = bm25_retriever(input=query_2_short)\n", - "output_3 = bm25_retriever(input = [query_1_short, query_2_short])\n", + "output_3 = bm25_retriever(input=[query_1_short, query_2_short])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -351,11 +356,13 @@ "document_map_func = lambda x: x[\"title\"] + \" \" + x[\"content\"]\n", "\n", "print(documents)\n", - "bm25_retriever.build_index_from_documents(documents=documents, document_map_func=document_map_func)\n", + "bm25_retriever.build_index_from_documents(\n", + " documents=documents, document_map_func=document_map_func\n", + ")\n", "\n", "output_1 = bm25_retriever(input=query_1_short)\n", "output_2 = bm25_retriever(input=query_2_short)\n", - "output_3 = bm25_retriever(input = [query_1_short, query_2_short])\n", + "output_3 = bm25_retriever(input=[query_1_short, query_2_short])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -425,7 +432,9 @@ "source": [ "# build index and run queries\n", "document_map_func = lambda x: x[\"content\"]\n", - "reranker.build_index_from_documents(documents=documents, document_map_func=document_map_func)\n", + "reranker.build_index_from_documents(\n", + " documents=documents, document_map_func=document_map_func\n", + ")\n", "\n", "print(reranker)" ] @@ -449,7 +458,7 @@ "# run queries\n", "output_1 = reranker(input=query_1)\n", "output_2 = reranker(input=query_2)\n", - "output_3 = reranker(input = [query_1, query_2])\n", + "output_3 = reranker(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -484,7 +493,7 @@ " documents=documents,\n", " document_map_func=document_map_func,\n", ")\n", - "print(reranker)\n" + "print(reranker)" ] }, { @@ -495,6 +504,7 @@ "source": [ "# run queries\n", "import torch\n", + "\n", "# Set the number of threads for PyTorch, avoid segementation fault\n", "torch.set_num_threads(1)\n", "torch.set_num_interop_threads(1)" @@ -516,11 +526,9 @@ } ], "source": [ - "\n", - "\n", "output_1 = reranker(input=query_1)\n", "output_2 = reranker(input=query_2)\n", - "output_3 = reranker(input = [query_1, query_2])\n", + "output_3 = reranker(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -551,17 +559,19 @@ ], "source": [ "# try to use title this time\n", - "document_map_func = lambda x: x[\"title\"] + \" \" + x[\"content\"] # no \n", + "document_map_func = lambda x: x[\"title\"] + \" \" + x[\"content\"] # no\n", "\n", - "reranker.build_index_from_documents(documents=documents, document_map_func=document_map_func)\n", + "reranker.build_index_from_documents(\n", + " documents=documents, document_map_func=document_map_func\n", + ")\n", "\n", "# run queries\n", "output_1 = reranker(input=query_1)\n", "output_2 = reranker(input=query_2)\n", - "output_3 = reranker(input = [query_1, query_2])\n", + "output_3 = reranker(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", - "print(output_3)\n" + "print(output_3)" ] }, { @@ -617,12 +627,12 @@ "}\n", "document_map_func = lambda x: x[\"content\"]\n", "llm_retriever = LLMRetriever(\n", - " top_k=2, \n", - " model_client=model_client, \n", - " model_kwargs=model_kwargs, \n", - " documents=documents, \n", - " document_map_func=document_map_func\n", - " )\n", + " top_k=2,\n", + " model_client=model_client,\n", + " model_kwargs=model_kwargs,\n", + " documents=documents,\n", + " document_map_func=document_map_func,\n", + ")\n", "print(llm_retriever)" ] }, @@ -645,7 +655,7 @@ "# run queries\n", "output_1 = llm_retriever(input=query_1)\n", "output_2 = llm_retriever(input=query_2)\n", - "output_3 = llm_retriever(input = [query_1, query_2])\n", + "output_3 = llm_retriever(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -674,7 +684,7 @@ "}\n", "output_1 = llm_retriever(model_kwargs=model_kwargs, input=query_1)\n", "output_2 = llm_retriever(model_kwargs=model_kwargs, input=query_2)\n", - "output_3 = llm_retriever(model_kwargs=model_kwargs, input = [query_1, query_2])\n", + "output_3 = llm_retriever(model_kwargs=model_kwargs, input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -739,12 +749,14 @@ "import fsspec\n", "import os\n", "import time\n", + "\n", + "\n", "def get_local_file_metadata(file_path: str):\n", " stat = os.stat(file_path)\n", " return {\n", - " 'size': stat.st_size, # File size in bytes\n", - " 'creation_date': time.ctime(stat.st_ctime), # Creation time\n", - " 'last_modified_date': time.ctime(stat.st_mtime) # Last modification time\n", + " \"size\": stat.st_size, # File size in bytes\n", + " \"creation_date\": time.ctime(stat.st_ctime), # Creation time\n", + " \"last_modified_date\": time.ctime(stat.st_mtime), # Last modification time\n", " }\n", "\n", "\n", @@ -774,9 +786,9 @@ " Returns:\n", " str: The content of the text file.\n", " \"\"\"\n", - " with fsspec.open(file_path, 'r') as file:\n", + " with fsspec.open(file_path, \"r\") as file:\n", " content = file.read()\n", - " return content\n" + " return content" ] }, { @@ -804,8 +816,8 @@ } ], "source": [ - "text = load_text_file('paul_graham/paul_graham_essay.txt')\n", - "file_metadata = get_local_file_metadata('paul_graham/paul_graham_essay.txt')\n", + "text = load_text_file(\"paul_graham/paul_graham_essay.txt\")\n", + "file_metadata = get_local_file_metadata(\"paul_graham/paul_graham_essay.txt\")\n", "print(text[:1000])\n", "print(file_metadata)" ] @@ -839,9 +851,12 @@ "from adalflow.core.types import Document\n", "\n", "# sentence splitting is confusing, the length needs to be smaller\n", - "metadata = {\"title\": \"Paul Graham's essay\", \"path\": \"data/paul_graham/paul_graham_essay.txt\"}\n", + "metadata = {\n", + " \"title\": \"Paul Graham's essay\",\n", + " \"path\": \"data/paul_graham/paul_graham_essay.txt\",\n", + "}\n", "metadata.update(file_metadata)\n", - "documents = [Document(text = text, meta_data = metadata)]\n", + "documents = [Document(text=text, meta_data=metadata)]\n", "splitter = DocumentSplitter(split_by=\"word\", split_length=800, split_overlap=200)\n", "\n", "print(documents)\n", @@ -925,7 +940,7 @@ ], "source": [ "# split the document\n", - "splitted_documents = splitter(documents = documents)\n", + "splitted_documents = splitter(documents=documents)\n", "print(splitted_documents[0], len(splitted_documents))" ] }, @@ -1303,15 +1318,20 @@ "\n", "# 1. set up the tracing for failed call as the retriever has generator attribute\n", "\n", + "\n", "@trace_generator_call(save_dir=\"tutorials/traces\")\n", "class LoggedLLMRetriever(LLMRetriever):\n", " pass\n", + "\n", + "\n", "top_k = 2\n", "retriever = LoggedLLMRetriever(\n", - " top_k = top_k, model_client=OpenAIClient(), model_kwargs={\"model\": \"gpt-3.5-turbo\"}\n", + " top_k=top_k, model_client=OpenAIClient(), model_kwargs={\"model\": \"gpt-3.5-turbo\"}\n", ")\n", "\n", - "retriever.build_index_from_documents(documents=[doc.text for doc in splitted_documents[0:16]])\n", + "retriever.build_index_from_documents(\n", + " documents=[doc.text for doc in splitted_documents[0:16]]\n", + ")\n", "\n", "print(retriever)\n", "retriever.generator.print_prompt()" @@ -1373,7 +1393,9 @@ "source": [ "# output[0].documents = [splitted_documents[idx] for idx in output[0].doc_indices]\n", "for per_query_output in output:\n", - " per_query_output.documents = [splitted_documents[idx] for idx in per_query_output.doc_indices]\n", + " per_query_output.documents = [\n", + " splitted_documents[idx] for idx in per_query_output.doc_indices\n", + " ]\n", "print(\"output.documents\", output[0].documents)\n", "len(output)" ] @@ -1537,51 +1559,51 @@ "source": [ "# create data transformer\n", "data_transformer_config = { # attribute and its config to recreate the component\n", - " \"embedder\":{\n", - " \"component_name\": \"Embedder\",\n", - " \"component_config\": {\n", - " \"model_client\": {\n", - " \"component_name\": \"OpenAIClient\",\n", - " \"component_config\": {},\n", - " },\n", - " \"model_kwargs\": {\n", - " \"model\": \"text-embedding-3-small\",\n", - " \"dimensions\": 256,\n", - " \"encoding_format\": \"float\",\n", - " },\n", + " \"embedder\": {\n", + " \"component_name\": \"Embedder\",\n", + " \"component_config\": {\n", + " \"model_client\": {\n", + " \"component_name\": \"OpenAIClient\",\n", + " \"component_config\": {},\n", " },\n", - " },\n", - " \"document_splitter\": {\n", - " \"component_name\": \"DocumentSplitter\",\n", - " \"component_config\": {\n", - " \"split_by\": \"word\",\n", - " \"split_length\": 400,\n", - " \"split_overlap\": 200,\n", + " \"model_kwargs\": {\n", + " \"model\": \"text-embedding-3-small\",\n", + " \"dimensions\": 256,\n", + " \"encoding_format\": \"float\",\n", " },\n", " },\n", - " \"to_embeddings\": {\n", - " \"component_name\": \"ToEmbeddings\",\n", - " \"component_config\": {\n", - " \"vectorizer\": {\n", - " \"component_name\": \"Embedder\",\n", - " \"component_config\": {\n", - " \"model_client\": {\n", - " \"component_name\": \"OpenAIClient\",\n", - " \"component_config\": {},\n", - " },\n", - " \"model_kwargs\": {\n", - " \"model\": \"text-embedding-3-small\",\n", - " \"dimensions\": 256,\n", - " \"encoding_format\": \"float\",\n", - " },\n", + " },\n", + " \"document_splitter\": {\n", + " \"component_name\": \"DocumentSplitter\",\n", + " \"component_config\": {\n", + " \"split_by\": \"word\",\n", + " \"split_length\": 400,\n", + " \"split_overlap\": 200,\n", + " },\n", + " },\n", + " \"to_embeddings\": {\n", + " \"component_name\": \"ToEmbeddings\",\n", + " \"component_config\": {\n", + " \"vectorizer\": {\n", + " \"component_name\": \"Embedder\",\n", + " \"component_config\": {\n", + " \"model_client\": {\n", + " \"component_name\": \"OpenAIClient\",\n", + " \"component_config\": {},\n", + " },\n", + " \"model_kwargs\": {\n", + " \"model\": \"text-embedding-3-small\",\n", + " \"dimensions\": 256,\n", + " \"encoding_format\": \"float\",\n", " },\n", - " # the other config is to instantiate the entity (class and function) with the given config as arguments\n", - " # \"entity_state\": \"storage/embedder.pkl\", # this will load back the state of the entity\n", " },\n", - " \"batch_size\": 100,\n", + " # the other config is to instantiate the entity (class and function) with the given config as arguments\n", + " # \"entity_state\": \"storage/embedder.pkl\", # this will load back the state of the entity\n", " },\n", + " \"batch_size\": 100,\n", " },\n", - " }" + " },\n", + "}" ] }, { @@ -1653,7 +1675,9 @@ "source": [ "from adalflow.core.component import Sequential\n", "\n", - "data_transformer = Sequential(components[\"document_splitter\"], components[\"to_embeddings\"])\n", + "data_transformer = Sequential(\n", + " components[\"document_splitter\"], components[\"to_embeddings\"]\n", + ")\n", "data_transformer" ] }, @@ -1861,7 +1885,7 @@ "source": [ "# test using only the document splitter\n", "text_split = components[\"document_splitter\"](documents)\n", - "print(text_split)\n" + "print(text_split)" ] }, { @@ -2132,7 +2156,7 @@ } ], "source": [ - "# check the length of all documents,text \n", + "# check the length of all documents,text\n", "lengths = set([doc.estimated_num_tokens for doc in documents])\n", "print(lengths)" ] @@ -2155,7 +2179,7 @@ "for doc in documents:\n", " if len(doc.vector) != 256:\n", " print(doc)\n", - " total+=1\n", + " total += 1\n", "print(total)" ] }, @@ -2334,9 +2358,9 @@ } ], "source": [ - "len_documents=len(restored_db.documents)\n", + "len_documents = len(restored_db.documents)\n", "keys = list(restored_db.transformed_documents.keys())\n", - "len_transformed_documents=len(restored_db.transformed_documents[keys[0]])\n", + "len_transformed_documents = len(restored_db.transformed_documents[keys[0]])\n", "print(len_documents, len_transformed_documents, keys)" ] }, @@ -2367,7 +2391,7 @@ ], "source": [ "# lets' print out part of the vector\n", - "restored_db.transformed_documents[keys[0]][0].vector[0:10]\n" + "restored_db.transformed_documents[keys[0]][0].vector[0:10]" ] }, { @@ -2397,11 +2421,9 @@ } ], "source": [ - "\n", "from adalflow.components.retriever import FAISSRetriever\n", "\n", "\n", - "\n", "retriever = FAISSRetriever(embedder=components[\"embedder\"], top_k=5)\n", "print(retriever)" ] @@ -2447,6 +2469,7 @@ "source": [ "# convert vectors to numpy array\n", "import numpy as np\n", + "\n", "vectors_np = np.array(vectors, dtype=np.float32)" ] }, @@ -2521,7 +2544,9 @@ "source": [ "# get initial documents\n", "for per_query_output in output:\n", - " per_query_output.documents = [documents[idx] for idx in per_query_output.doc_indices]\n", + " per_query_output.documents = [\n", + " documents[idx] for idx in per_query_output.doc_indices\n", + " ]\n", "\n", "output" ] @@ -2591,7 +2616,9 @@ "outputs": [], "source": [ "retriever = BM25Retriever(top_k=1)\n", - "retriever.build_index_from_documents([\"hello world\", \"world is beautiful\", \"today is a good day\"])\n", + "retriever.build_index_from_documents(\n", + " [\"hello world\", \"world is beautiful\", \"today is a good day\"]\n", + ")\n", "output = retriever.retrieve(\"hello\")\n", "output" ] diff --git a/tutorials/tools.ipynb b/tutorials/tools.ipynb index c32b9420..092ef764 100644 --- a/tutorials/tools.ipynb +++ b/tutorials/tools.ipynb @@ -20,6 +20,7 @@ "\n", "client = OpenAI()\n", "\n", + "\n", "# Example dummy function hard coded to return the same weather\n", "# In production, this could be your backend API or an external API\n", "def get_current_weather(location, unit=\"fahrenheit\"):\n", @@ -27,15 +28,23 @@ " if \"tokyo\" in location.lower():\n", " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": unit})\n", " elif \"san francisco\" in location.lower():\n", - " return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit})\n", + " return json.dumps(\n", + " {\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit}\n", + " )\n", " elif \"paris\" in location.lower():\n", " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": unit})\n", " else:\n", " return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n", "\n", + "\n", "def run_conversation():\n", " # Step 1: send the conversation and available functions to the model\n", - " messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\"}]\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\",\n", + " }\n", + " ]\n", " tools = [\n", " {\n", " \"type\": \"function\",\n", @@ -95,6 +104,8 @@ " messages=messages,\n", " ) # get a new response from the model where it can see the function response\n", " return second_response\n", + "\n", + "\n", "print(run_conversation())" ] }, @@ -110,16 +121,19 @@ "import time\n", "import asyncio\n", "\n", + "\n", "def multiply(a: int, b: int) -> int:\n", " \"\"\"Multiply two numbers.\"\"\"\n", " time.sleep(1)\n", " return a * b\n", "\n", + "\n", "def add(a: int, b: int) -> int:\n", " \"\"\"Add two numbers.\"\"\"\n", " time.sleep(1)\n", " return a + b\n", "\n", + "\n", "async def divide(a: float, b: float) -> float:\n", " \"\"\"Divide two numbers.\"\"\"\n", " await asyncio.sleep(1)\n", @@ -136,15 +150,20 @@ " \"\"\"Sum the elements of an array.\"\"\"\n", " return np.sum(arr)\n", "\n", + "\n", "x = 2\n", + "\n", + "\n", "@dataclass\n", "class Point:\n", " x: int\n", " y: int\n", "\n", + "\n", "def add_points(p1: Point, p2: Point) -> Point:\n", " return Point(p1.x + p2.x, p1.y + p2.y)\n", "\n", + "\n", "all_functions = [multiply, add, divide, search, numpy_sum, add_points]\n", "\n", "all_functions_dict = {f.__name__: f for f in all_functions}" @@ -173,10 +192,8 @@ "\n", "from adalflow.core.func_tool import FunctionTool\n", "\n", - "functions =[multiply, add, divide, search, numpy_sum, add_points]\n", - "tools = [\n", - " FunctionTool(fn=fn) for fn in functions\n", - "]\n", + "functions = [multiply, add, divide, search, numpy_sum, add_points]\n", + "tools = [FunctionTool(fn=fn) for fn in functions]\n", "for tool in tools:\n", " print(tool)" ] @@ -188,7 +205,7 @@ "outputs": [], "source": [ "# create a context map\n", - "context_map = {tool.definition.func_name: tool for tool in tools}\n" + "context_map = {tool.definition.func_name: tool for tool in tools}" ] }, { @@ -295,7 +312,7 @@ } ], "source": [ - "# execute get_current_weather using function call \n", + "# execute get_current_weather using function call\n", "\n", "ft.call(**{\"location\": \"San Francisco\", \"unit\": \"celsius\"})" ] @@ -344,8 +361,7 @@ "print(tools[2].execute(**{\"a\": 10, \"b\": 2}))\n", "\n", "display(await tools[2].acall(**{\"a\": 10, \"b\": 2}))\n", - "display(await tools[2].execute(**{\"a\": 10, \"b\": 2}))\n", - "\n" + "display(await tools[2].execute(**{\"a\": 10, \"b\": 2}))" ] }, { @@ -442,34 +458,38 @@ } ], "source": [ - "# call all the above functions \n", + "# call all the above functions\n", "import nest_asyncio\n", "\n", "nest_asyncio.apply()\n", "\n", "\n", - "\n", "async def async_function_1():\n", " await asyncio.sleep(1)\n", " return \"Function 1 completed\"\n", "\n", + "\n", "def sync_function_1():\n", " time.sleep(1)\n", " return \"Function 1 completed\"\n", "\n", + "\n", "async def async_function_2():\n", " await asyncio.sleep(2)\n", " return \"Function 2 completed\"\n", "\n", + "\n", "def sync_function_2():\n", " time.sleep(2)\n", " return \"Function 2 completed\"\n", "\n", + "\n", "async_tool_1 = FunctionTool(async_function_1)\n", "sync_tool_1 = FunctionTool(sync_function_2)\n", "async_tool_2 = FunctionTool(async_function_2)\n", "sync_tool_2 = FunctionTool(sync_function_2)\n", "\n", + "\n", "def run_sync_and_async_mix_without_wait():\n", " # both sync and async tool can use execute\n", " # sync tool can also use call\n", @@ -484,6 +504,7 @@ " print(f\"run_sync_and_async_mix_without_wait time: {end_time - start_time}\")\n", " return results\n", "\n", + "\n", "async def run_sync_and_async_mix():\n", " # both sync and async tool can use execute&to_thread\n", " # async tool can also use acall without to_thread\n", @@ -492,13 +513,13 @@ " results = await asyncio.gather(\n", " async_tool_1.execute(),\n", " sync_tool_1.execute(),\n", - " \n", " async_tool_2.acall(),\n", " )\n", " end_time = time.time()\n", " print(f\"run_sync_and_async_mix time: {end_time - start_time}\")\n", " return results\n", "\n", + "\n", "# Execute functions\n", "results_without_wait = run_sync_and_async_mix_without_wait()\n", "display(results_without_wait)\n", @@ -675,7 +696,7 @@ "small_tool_manager = ToolManager(tools=tools[:2])\n", "\n", "renered_prompt = prompt(tools=tool_manager.yaml_definitions)\n", - "print(renered_prompt)\n" + "print(renered_prompt)" ] }, { @@ -703,16 +724,16 @@ } ], "source": [ - "# let's render the output format using Function class \n", + "# let's render the output format using Function class\n", "\n", "from adalflow.core.types import Function\n", "\n", "\n", - "output_data_class = Function \n", + "output_data_class = Function\n", "output_format_str = output_data_class.to_json_signature(exclude=[\"thought\"])\n", "\n", - "renered_prompt= prompt(output_format_str=output_format_str)\n", - "print(renered_prompt)\n" + "renered_prompt = prompt(output_format_str=output_format_str)\n", + "print(renered_prompt)" ] }, { @@ -776,7 +797,7 @@ "\n", "func_parser = JsonOutputParser(data_class=Function)\n", "instructions = func_parser.format_instructions(exclude=[\"thought\"])\n", - "print(instructions)\n" + "print(instructions)" ] }, { @@ -844,9 +865,7 @@ "model_kwargs = {\"model\": \"gpt-3.5-turbo\"}\n", "prompt_kwargs = {\n", " \"tools\": tool_manager.yaml_definitions,\n", - " \"output_format_str\": func_parser.format_instructions(\n", - " exclude=[\"thought\", \"args\"]\n", - " ),\n", + " \"output_format_str\": func_parser.format_instructions(exclude=[\"thought\", \"args\"]),\n", "}\n", "generator = Generator(\n", " model_client=ModelClientType.OPENAI(),\n", @@ -887,14 +906,14 @@ "source": [ "# call queries\n", "queries = [\n", - " \"add 2 and 3\",\n", - " \"search for something\",\n", - " \"add points (1, 2) and (3, 4)\",\n", - " \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\",\n", - " \"multiply 2 with local variable x\",\n", - " \"divide 2 by 3\",\n", - " \"Add 5 to variable y\",\n", - " ]" + " \"add 2 and 3\",\n", + " \"search for something\",\n", + " \"add points (1, 2) and (3, 4)\",\n", + " \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\",\n", + " \"multiply 2 with local variable x\",\n", + " \"divide 2 by 3\",\n", + " \"Add 5 to variable y\",\n", + "]" ] }, { @@ -1046,7 +1065,6 @@ } ], "source": [ - "\n", "for idx, query in enumerate(queries):\n", " prompt_kwargs = {\"input_str\": query}\n", " print(f\"\\n{idx} Query: {query}\")\n", @@ -1056,10 +1074,12 @@ " # print(f\"LLM raw output: {result.raw_response}\")\n", " func = Function.from_dict(result.data)\n", " print(f\"Function: {func}\")\n", - " func_output= tool_manager.execute_func(func)\n", + " func_output = tool_manager.execute_func(func)\n", " display(f\"Function output: {func_output}\")\n", " except Exception as e:\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")" + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )" ] }, { @@ -1100,7 +1120,7 @@ } ], "source": [ - "# let's use FunctionExpression to call the function instead \n", + "# let's use FunctionExpression to call the function instead\n", "\n", "from adalflow.core.types import FunctionExpression\n", "\n", @@ -1109,7 +1129,9 @@ "print(output_format_str)\n", "\n", "# lets' add one example to be more robust that they should call it with function call expression\n", - "example = FunctionExpression.from_function(thought=None, func=add_points, **{\"p1\": Point(1, 2), \"p2\": Point(3, 4)})\n", + "example = FunctionExpression.from_function(\n", + " thought=None, func=add_points, **{\"p1\": Point(1, 2), \"p2\": Point(3, 4)}\n", + ")\n", "print(example)" ] }, @@ -1258,15 +1280,15 @@ "instructions = parser.format_instructions(exclude=[\"thought\"])\n", "\n", "prompt_kwargs = {\n", - " \"tools\": [tool.definition.to_yaml() for tool in tools],\n", - " \"output_format_str\": parser.format_instructions(exclude=[\"thought\"]),\n", - " }\n", + " \"tools\": [tool.definition.to_yaml() for tool in tools],\n", + " \"output_format_str\": parser.format_instructions(exclude=[\"thought\"]),\n", + "}\n", "generator = Generator(\n", " model_client=ModelClientType.OPENAI(),\n", " model_kwargs=model_kwargs,\n", " template=template,\n", " prompt_kwargs=prompt_kwargs,\n", - " output_processors=parser\n", + " output_processors=parser,\n", ")\n", "\n", "generator.print_prompt(**prompt_kwargs)" @@ -1291,67 +1313,65 @@ "\n", "# Define a list of safe built-ins\n", "SAFE_BUILTINS = {\n", - " 'abs': abs,\n", - " 'all': all,\n", - " 'any': any,\n", - " 'bin': bin,\n", - " 'bool': bool,\n", - " 'bytearray': bytearray,\n", - " 'bytes': bytes,\n", - " 'callable': callable,\n", - " 'chr': chr,\n", - " 'complex': complex,\n", - " 'dict': dict,\n", - " 'divmod': divmod,\n", - " 'enumerate': enumerate,\n", - " 'filter': filter,\n", - " 'float': float,\n", - " 'format': format,\n", - " 'frozenset': frozenset,\n", - " 'getattr': getattr,\n", - " 'hasattr': hasattr,\n", - " 'hash': hash,\n", - " 'hex': hex,\n", - " 'int': int,\n", - " 'isinstance': isinstance,\n", - " 'issubclass': issubclass,\n", - " 'iter': iter,\n", - " 'len': len,\n", - " 'list': list,\n", - " 'map': map,\n", - " 'max': max,\n", - " 'min': min,\n", - " 'next': next,\n", - " 'object': object,\n", - " 'oct': oct,\n", - " 'ord': ord,\n", - " 'pow': pow,\n", - " 'range': range,\n", - " 'repr': repr,\n", - " 'reversed': reversed,\n", - " 'round': round,\n", - " 'set': set,\n", - " 'slice': slice,\n", - " 'sorted': sorted,\n", - " 'str': str,\n", - " 'sum': sum,\n", - " 'tuple': tuple,\n", - " 'type': type,\n", - " 'zip': zip,\n", + " \"abs\": abs,\n", + " \"all\": all,\n", + " \"any\": any,\n", + " \"bin\": bin,\n", + " \"bool\": bool,\n", + " \"bytearray\": bytearray,\n", + " \"bytes\": bytes,\n", + " \"callable\": callable,\n", + " \"chr\": chr,\n", + " \"complex\": complex,\n", + " \"dict\": dict,\n", + " \"divmod\": divmod,\n", + " \"enumerate\": enumerate,\n", + " \"filter\": filter,\n", + " \"float\": float,\n", + " \"format\": format,\n", + " \"frozenset\": frozenset,\n", + " \"getattr\": getattr,\n", + " \"hasattr\": hasattr,\n", + " \"hash\": hash,\n", + " \"hex\": hex,\n", + " \"int\": int,\n", + " \"isinstance\": isinstance,\n", + " \"issubclass\": issubclass,\n", + " \"iter\": iter,\n", + " \"len\": len,\n", + " \"list\": list,\n", + " \"map\": map,\n", + " \"max\": max,\n", + " \"min\": min,\n", + " \"next\": next,\n", + " \"object\": object,\n", + " \"oct\": oct,\n", + " \"ord\": ord,\n", + " \"pow\": pow,\n", + " \"range\": range,\n", + " \"repr\": repr,\n", + " \"reversed\": reversed,\n", + " \"round\": round,\n", + " \"set\": set,\n", + " \"slice\": slice,\n", + " \"sorted\": sorted,\n", + " \"str\": str,\n", + " \"sum\": sum,\n", + " \"tuple\": tuple,\n", + " \"type\": type,\n", + " \"zip\": zip,\n", "}\n", "\n", + "\n", "# Define a context manager to limit execution time\n", "# Create a sandbox execution function\n", "def sandbox_exec(code, context=SAFE_BUILTINS, timeout=5):\n", "\n", " try:\n", - " compiled_code = compile(code, '', 'exec')\n", + " compiled_code = compile(code, \"\", \"exec\")\n", "\n", " # Result dictionary to store execution results\n", - " result = {\n", - " \"output\" : None,\n", - " \"error\" : None\n", - " }\n", + " result = {\"output\": None, \"error\": None}\n", "\n", " # Define a target function for the thread\n", " def target():\n", @@ -1360,7 +1380,6 @@ " exec(compiled_code, context, result)\n", " except Exception as e:\n", " result[\"error\"] = e\n", - " \n", "\n", " # Create a thread to execute the code\n", " thread = threading.Thread(target=target)\n", @@ -1377,6 +1396,7 @@ "\n", " return result\n", "\n", + "\n", "# Example usage\n", "code = \"\"\"\n", "def add(a, b+5):\n", @@ -1391,7 +1411,7 @@ "except TimeoutError as e:\n", " print(e)\n", "except Exception as e:\n", - " print(\"Sandbox error:\", e)\n" + " print(\"Sandbox error:\", e)" ] }, { @@ -1510,23 +1530,23 @@ } ], "source": [ - "# run the generator but we will use FunctionTool.parse_function_call_expr and have a context map \n", + "# run the generator but we will use FunctionTool.parse_function_call_expr and have a context map\n", "\n", "all_functions_dict.update(\n", " {\n", - " \"Point\": Point,\n", - " # support numpy\n", - " \"np\": np,\n", - " \"np.ndarray\": np.ndarray,\n", - " \"array\": np.array,\n", - " \"arr\": arr,\n", - " \"np.array\": np.array,\n", - " \"x\": x\n", + " \"Point\": Point,\n", + " # support numpy\n", + " \"np\": np,\n", + " \"np.ndarray\": np.ndarray,\n", + " \"array\": np.array,\n", + " \"arr\": arr,\n", + " \"np.array\": np.array,\n", + " \"x\": x,\n", " }\n", ")\n", - "y=4\n", + "y = 4\n", "print(all_functions_dict)\n", - "for query in queries+[\"Add 5 to variable y\"]:\n", + "for query in queries + [\"Add 5 to variable y\"]:\n", "\n", " try:\n", " print(f\"Query: {query}\")\n", @@ -1537,10 +1557,14 @@ " func_expr = FunctionExpression.from_dict(result.data)\n", "\n", " print(func_expr)\n", - " assert isinstance(func_expr, FunctionExpression), f\"Expected FunctionExpression, got {type(result.data)}\"\n", + " assert isinstance(\n", + " func_expr, FunctionExpression\n", + " ), f\"Expected FunctionExpression, got {type(result.data)}\"\n", "\n", " # more secure way to handle function call\n", - " func: Function = FunctionTool.parse_function_call_expr(expr=func_expr.action, context_map=all_functions_dict)\n", + " func: Function = FunctionTool.parse_function_call_expr(\n", + " expr=func_expr.action, context_map=all_functions_dict\n", + " )\n", " print(func)\n", " fun_output = all_functions_dict[func.name](*func.args, **func.kwargs)\n", " print(\"func output:\", fun_output)\n", @@ -1558,18 +1582,24 @@ " print(\"sandbox output:\", result)\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")\n", + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )\n", " try:\n", " fun_output = eval(func_expr.action)\n", " print(\"func output:\", fun_output)\n", "\n", - " #sandbox_exec\n", + " # sandbox_exec\n", " action = \"output=\" + func_expr.action\n", - " result = sandbox_exec(action, context={**SAFE_BUILTINS, **all_functions_dict})\n", + " result = sandbox_exec(\n", + " action, context={**SAFE_BUILTINS, **all_functions_dict}\n", + " )\n", " print(\"sandbox output:\", result)\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")" + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )" ] }, { @@ -1776,20 +1806,27 @@ } ], "source": [ - "queries = [\"add 2 and 3\", \"search for something\", \"add points (1, 2) and (3, 4)\", \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\", \"multiply 2 with local variable x\", \"divide 2 by 3\"]\n", + "queries = [\n", + " \"add 2 and 3\",\n", + " \"search for something\",\n", + " \"add points (1, 2) and (3, 4)\",\n", + " \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\",\n", + " \"multiply 2 with local variable x\",\n", + " \"divide 2 by 3\",\n", + "]\n", "\n", - "from adalflow.core.string_parser import JsonParser # improve a list of json\n", + "from adalflow.core.string_parser import JsonParser # improve a list of json\n", "\n", "preset_prompt_kwargs = {\n", - " \"tools\": [tool.definition.to_yaml() for tool in tools],\n", - " \"output_format_str\": parser.format_instructions(exclude=[\"thought\"])\n", - " }\n", + " \"tools\": [tool.definition.to_yaml() for tool in tools],\n", + " \"output_format_str\": parser.format_instructions(exclude=[\"thought\"]),\n", + "}\n", "multi_call_gen = Generator(\n", " model_client=ModelClientType.OPENAI(),\n", " model_kwargs=model_kwargs,\n", " template=multple_function_call_template,\n", " prompt_kwargs=preset_prompt_kwargs,\n", - " output_processors=JsonParser()\n", + " output_processors=JsonParser(),\n", ")\n", "print(multi_call_gen)\n", "multi_call_gen.print_prompt()" @@ -1882,8 +1919,12 @@ } ], "source": [ - "def execute_function_by_parsing(func_expr: FunctionExpression, all_functions_dict: Dict[str, Any]) -> Any:\n", - " func: Function = FunctionTool.parse_function_call_expr(expr=func_expr.action, context_map=all_functions_dict)\n", + "def execute_function_by_parsing(\n", + " func_expr: FunctionExpression, all_functions_dict: Dict[str, Any]\n", + ") -> Any:\n", + " func: Function = FunctionTool.parse_function_call_expr(\n", + " expr=func_expr.action, context_map=all_functions_dict\n", + " )\n", " print(func)\n", " fun_output = all_functions_dict[func.name](*func.args, **func.kwargs)\n", " print(\"func output:\", fun_output)\n", @@ -1900,7 +1941,10 @@ " print(\"func output:\", fun_output)\n", " return fun_output\n", "\n", - "def execute_function_by_sandbox(func_expr: FunctionExpression, all_functions_dict: Dict[str, Any]) -> Any:\n", + "\n", + "def execute_function_by_sandbox(\n", + " func_expr: FunctionExpression, all_functions_dict: Dict[str, Any]\n", + ") -> Any:\n", " # sandbox_exec\n", " action = \"output=\" + func_expr.action\n", " result = sandbox_exec(action, context={**SAFE_BUILTINS, **all_functions_dict})\n", @@ -1909,10 +1953,8 @@ " return result\n", "\n", "\n", - "\n", - "\n", "for i in range(0, len(queries), 2):\n", - " query = \" and \".join(queries[i:i+2])\n", + " query = \" and \".join(queries[i : i + 2])\n", " print(f\"Query: {query}\\n_________________________\\n\")\n", " prompt_kwargs = {\"input_str\": query}\n", " result = multi_call_gen(prompt_kwargs=prompt_kwargs)\n", @@ -1925,32 +1967,46 @@ " print(func_exprs)\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to parse the function for query: {query}, func: {result.data}, error: {e}\")\n", + " print(\n", + " f\"Failed to parse the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )\n", " continue\n", " try:\n", - " func_outputs_1 = [execute_function_by_parsing(func_expr, all_functions_dict) for func_expr in func_exprs]\n", + " func_outputs_1 = [\n", + " execute_function_by_parsing(func_expr, all_functions_dict)\n", + " for func_expr in func_exprs\n", + " ]\n", " print(f\"fun_output by parsing: {func_outputs_1}\\n_________________________\\n\")\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")\n", + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )\n", "\n", " try:\n", "\n", - " func_outputs_2 = [execute_function_by_eval(func_expr) for func_expr in func_exprs]\n", + " func_outputs_2 = [\n", + " execute_function_by_eval(func_expr) for func_expr in func_exprs\n", + " ]\n", " print(f\"fun_output by eval: {func_outputs_2}\\n_________________________\\n\")\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")\n", + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )\n", "\n", " try:\n", "\n", - " func_outputs_3 = [execute_function_by_sandbox(func_expr, all_functions_dict) for func_expr in func_exprs]\n", + " func_outputs_3 = [\n", + " execute_function_by_sandbox(func_expr, all_functions_dict)\n", + " for func_expr in func_exprs\n", + " ]\n", " print(f\"fun_output by sandbox: {func_outputs_3}\\n_________________________\\n\")\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")\n", - "\n", - " \n" + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )" ] }, { @@ -1975,6 +2031,7 @@ "\n", "client = OpenAI()\n", "\n", + "\n", "# Example dummy function hard coded to return the same weather\n", "# In production, this could be your backend API or an external API\n", "def get_current_weather(location, unit=\"fahrenheit\"):\n", @@ -1982,15 +2039,23 @@ " if \"tokyo\" in location.lower():\n", " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": unit})\n", " elif \"san francisco\" in location.lower():\n", - " return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit})\n", + " return json.dumps(\n", + " {\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit}\n", + " )\n", " elif \"paris\" in location.lower():\n", " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": unit})\n", " else:\n", " return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n", "\n", + "\n", "def run_conversation():\n", " # Step 1: send the conversation and available functions to the model\n", - " messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris in celsius?\"}]\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris in celsius?\",\n", + " }\n", + " ]\n", " tools = [\n", " {\n", " \"type\": \"function\",\n", @@ -2034,11 +2099,13 @@ " for tool_call in tool_calls:\n", " function_name = tool_call.function.name\n", " function_to_call = available_functions[function_name]\n", - " function_args = json.loads(tool_call.function.arguments)# use json.loads to convert a string to a dictionary\n", + " function_args = json.loads(\n", + " tool_call.function.arguments\n", + " ) # use json.loads to convert a string to a dictionary\n", " # function_response = function_to_call(\n", " # location=function_args.get(\"location\"),\n", " # unit=function_args.get(\"unit\"),\n", - " # ) \n", + " # )\n", " # you have to exactly know the arguments, this does not make sense. How would i know its arguments. **function_args (makes more sense)\n", " function_response = function_to_call(**function_args)\n", " messages.append(\n", @@ -2054,6 +2121,8 @@ " messages=messages,\n", " ) # get a new response from the model where it can see the function response\n", " return second_response\n", + "\n", + "\n", "print(run_conversation())" ] }, @@ -2109,18 +2178,17 @@ "outputs": [], "source": [ "def get_current_weather(location: str, unit: str = \"fahrenheit\"):\n", - " \"\"\"Get the current weather in a given location\"\"\"\n", - " if \"tokyo\" in location.lower():\n", - " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": unit})\n", - " elif \"san francisco\" in location.lower():\n", - " return json.dumps(\n", - " {\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit}\n", - " )\n", - " elif \"paris\" in location.lower():\n", - " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": unit})\n", - " else:\n", - " return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n", - "\n" + " \"\"\"Get the current weather in a given location\"\"\"\n", + " if \"tokyo\" in location.lower():\n", + " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": unit})\n", + " elif \"san francisco\" in location.lower():\n", + " return json.dumps(\n", + " {\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit}\n", + " )\n", + " elif \"paris\" in location.lower():\n", + " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": unit})\n", + " else:\n", + " return json.dumps({\"location\": location, \"temperature\": \"unknown\"})" ] }, { @@ -2134,21 +2202,29 @@ "from adalflow.core.base_data_class import DataClass\n", "from dataclasses import dataclass, field\n", "\n", + "\n", "@dataclass\n", "class Weather(DataClass):\n", - " location: str = field(metadata={\"description\": \"The city and state, e.g. San Francisco, CA\"})\n", + " location: str = field(\n", + " metadata={\"description\": \"The city and state, e.g. San Francisco, CA\"}\n", + " )\n", " unit: str = field(metadata={\"enum\": [\"celsius\", \"fahrenheit\"]})\n", "\n", + "\n", "def get_current_weather_2(weather: Weather):\n", " \"\"\"Get the current weather in a given location\"\"\"\n", " if \"tokyo\" in weather.location.lower():\n", - " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": weather.unit})\n", + " return json.dumps(\n", + " {\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": weather.unit}\n", + " )\n", " elif \"san francisco\" in weather.location.lower():\n", " return json.dumps(\n", " {\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": weather.unit}\n", " )\n", " elif \"paris\" in weather.location.lower():\n", - " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": weather.unit})\n", + " return json.dumps(\n", + " {\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": weather.unit}\n", + " )\n", " else:\n", " return json.dumps({\"location\": weather.location, \"temperature\": \"unknown\"})" ] @@ -2211,8 +2287,7 @@ "\n", "tool_2 = FunctionTool.from_defaults(fn=get_current_weather_2)\n", "\n", - "print(tool_2.metadata.to_json())\n", - "\n" + "print(tool_2.metadata.to_json())" ] }, { @@ -2229,38 +2304,23 @@ "metadata": {}, "outputs": [], "source": [ - "adalflow_fn_schema ={\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"weather\": {\n", - " \"type\": \"Weather\",\n", - " \"desc\": \"The city and state, e.g. San Francisco, CA\",\n", - " \"enum\": [\n", - " \"celsius\",\n", - " \"fahrenheit\"\n", - " ]\n", - " }\n", - " },\n", - " \"required\": [\n", - " \"weather\"\n", - " ],\n", - " \"definitions\": {\n", - " \"weather\": {\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"location\": {\n", - " \"type\": \"str\"\n", - " },\n", - " \"unit\": {\n", - " \"type\": \"str\"\n", - " }\n", - " },\n", - " \"required\": [\n", - " \"location\",\n", - " \"unit\"\n", - " ]\n", - " }\n", + "adalflow_fn_schema = {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"weather\": {\n", + " \"type\": \"Weather\",\n", + " \"desc\": \"The city and state, e.g. San Francisco, CA\",\n", + " \"enum\": [\"celsius\", \"fahrenheit\"],\n", + " }\n", + " },\n", + " \"required\": [\"weather\"],\n", + " \"definitions\": {\n", + " \"weather\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\"location\": {\"type\": \"str\"}, \"unit\": {\"type\": \"str\"}},\n", + " \"required\": [\"location\", \"unit\"],\n", " }\n", + " },\n", "}" ] }, @@ -2333,7 +2393,7 @@ } ], "source": [ - "# prepare function tool \n", + "# prepare function tool\n", "weather_tool = FunctionTool.from_defaults(fn=_get_current_weather)\n", "print(weather_tool)" ] @@ -2395,7 +2455,7 @@ } ], "source": [ - "# prepare a minimal function calling template \n", + "# prepare a minimal function calling template\n", "template = r\"\"\"You have these tools available:\n", " \n", " {% for tool in tools %}\n", @@ -2434,11 +2494,13 @@ "\n", "model_kwargs = {\"model\": \"gpt-3.5-turbo\", \"temperature\": 0.3, \"stream\": False}\n", "\n", + "\n", "@dataclass\n", "class Function(DataClass):\n", " name: str = field(metadata={\"desc\": \"The name of the function\"})\n", " args: Dict[str, Any] = field(metadata={\"desc\": \"The arguments of the function\"})\n", "\n", + "\n", "generator = Generator(\n", " model_client=ModelClientType.OPENAI(),\n", " model_kwargs=model_kwargs,\n", @@ -2535,9 +2597,7 @@ "source": [ "# call the function\n", "\n", - "function_map = {\n", - " \"_get_current_weather\": weather_tool\n", - "}\n", + "function_map = {\"_get_current_weather\": weather_tool}\n", "\n", "function_name = structured_output.name\n", "function_args = structured_output.args\n", @@ -2695,22 +2755,30 @@ "from dataclasses import dataclass, field\n", "from typing import Any, Dict\n", "\n", + "\n", "@dataclass\n", "class Address:\n", " street: str\n", " city: str\n", " zipcode: str\n", "\n", + "\n", "@dataclass\n", "class Person:\n", " name: str\n", " age: int\n", " address: Address\n", "\n", + "\n", "# Example instance of the nested dataclasses\n", - "person = Person(name=\"John Doe\", age=30, address=Address(street=\"123 Main St\", city=\"Anytown\", zipcode=\"12345\"))\n", + "person = Person(\n", + " name=\"John Doe\",\n", + " age=30,\n", + " address=Address(street=\"123 Main St\", city=\"Anytown\", zipcode=\"12345\"),\n", + ")\n", "print(person)\n", "\n", + "\n", "def to_dict(obj: Any) -> Dict[str, Any]:\n", " if hasattr(obj, \"__dataclass_fields__\"):\n", " return {key: to_dict(value) for key, value in obj.__dict__.items()}\n", @@ -2721,6 +2789,7 @@ " else:\n", " return obj\n", "\n", + "\n", "# Convert the person instance to a dictionary\n", "person_dict = to_dict(person)\n", "print(person_dict)" @@ -2741,20 +2810,31 @@ ], "source": [ "from typing import List\n", + "\n", + "\n", "@dataclass\n", "class Address:\n", " street: str\n", " city: str\n", " zipcode: str\n", "\n", + "\n", "@dataclass\n", "class Person:\n", " name: str\n", " age: int\n", " addresses: List[Address]\n", "\n", + "\n", "# Example instance of the nested dataclasses\n", - "person = Person(name=\"John Doe\", age=30, addresses=[Address(street=\"123 Main St\", city=\"Anytown\", zipcode=\"12345\"), Address(street=\"456 Elm St\", city=\"Othertown\", zipcode=\"67890\")])\n", + "person = Person(\n", + " name=\"John Doe\",\n", + " age=30,\n", + " addresses=[\n", + " Address(street=\"123 Main St\", city=\"Anytown\", zipcode=\"12345\"),\n", + " Address(street=\"456 Elm St\", city=\"Othertown\", zipcode=\"67890\"),\n", + " ],\n", + ")\n", "print(person)" ] }, @@ -2795,6 +2875,8 @@ ], "source": [ "from typing import List, Dict, Optional\n", + "\n", + "\n", "def dataclass_obj_to_dict(\n", " obj: Any, exclude: Optional[Dict[str, List[str]]] = None, parent_key: str = \"\"\n", ") -> Dict[str, Any]:\n", @@ -2851,24 +2933,30 @@ " else:\n", " return obj\n", "\n", + "\n", "from dataclasses import dataclass\n", "from typing import List\n", "\n", + "\n", "@dataclass\n", "class TrecData:\n", " question: str\n", " label: int\n", "\n", + "\n", "@dataclass\n", "class TrecDataList:\n", "\n", " data: List[TrecData]\n", " name: str\n", "\n", + "\n", "trec_data = TrecData(question=\"What is the capital of France?\", label=0)\n", "trec_data_list = TrecDataList(data=[trec_data], name=\"trec_data_list\")\n", "\n", - "dataclass_obj_to_dict(trec_data_list, exclude={\"TrecData\": [\"label\"], \"TrecDataList\": [\"name\"]})" + "dataclass_obj_to_dict(\n", + " trec_data_list, exclude={\"TrecData\": [\"label\"], \"TrecDataList\": [\"name\"]}\n", + ")" ] }, { @@ -2878,14 +2966,24 @@ "outputs": [], "source": [ "from typing import Type\n", + "\n", + "\n", "def dataclass_obj_from_dict(cls: Type[Any], data: Dict[str, Any]) -> Any:\n", " if hasattr(cls, \"__dataclass_fields__\"):\n", " fieldtypes = {f.name: f.type for f in cls.__dataclass_fields__.values()}\n", - " return cls(**{key: dataclass_obj_from_dict(fieldtypes[key], value) for key, value in data.items()})\n", + " return cls(\n", + " **{\n", + " key: dataclass_obj_from_dict(fieldtypes[key], value)\n", + " for key, value in data.items()\n", + " }\n", + " )\n", " elif isinstance(data, list):\n", " return [dataclass_obj_from_dict(cls.__args__[0], item) for item in data]\n", " elif isinstance(data, dict):\n", - " return {key: dataclass_obj_from_dict(cls.__args__[1], value) for key, value in data.items()}\n", + " return {\n", + " key: dataclass_obj_from_dict(cls.__args__[1], value)\n", + " for key, value in data.items()\n", + " }\n", " else:\n", " return data" ] @@ -2933,7 +3031,12 @@ } ], "source": [ - "dataclass_obj_from_dict(TrecDataList, dataclass_obj_to_dict(trec_data_list, exclude={\"TrecData\": [\"label\"], \"TrecDataList\": [\"name\"]}))" + "dataclass_obj_from_dict(\n", + " TrecDataList,\n", + " dataclass_obj_to_dict(\n", + " trec_data_list, exclude={\"TrecData\": [\"label\"], \"TrecDataList\": [\"name\"]}\n", + " ),\n", + ")" ] } ], diff --git a/use_cases/agent/react_agent.ipynb b/use_cases/agent/react_agent.ipynb index cdc199fe..a93cb89e 100644 --- a/use_cases/agent/react_agent.ipynb +++ b/use_cases/agent/react_agent.ipynb @@ -43,6 +43,7 @@ "source": [ "# load the dataset\n", "from datasets import load_dataset\n", + "\n", "dataset = load_dataset(path=\"hotpot_qa\", name=\"fullwiki\")" ] }, @@ -114,7 +115,6 @@ } ], "source": [ - "\n", "import dotenv\n", "from adalflow.components.model_client import OpenAIClient\n", "from adalflow.components.agent.react_agent import ReActAgent\n", @@ -150,15 +150,17 @@ "import re\n", "import string\n", "\n", + "\n", "# copy code from the paper\n", "def clean_str(p):\n", - " return p.encode().decode(\"unicode-escape\").encode(\"latin1\").decode(\"utf-8\")\n", + " return p.encode().decode(\"unicode-escape\").encode(\"latin1\").decode(\"utf-8\")\n", + "\n", "\n", "# normalization copied from the paper's code\n", "def normalize_answer(s):\n", " def remove_articles(text):\n", " return re.sub(r\"\\b(a|an|the)\\b\", \" \", text)\n", - " \n", + "\n", " def white_space_fix(text):\n", " return \" \".join(text.split())\n", "\n", @@ -171,6 +173,7 @@ "\n", " return white_space_fix(remove_articles(remove_punc(lower(s))))\n", "\n", + "\n", "def search(entity: str) -> str:\n", " \"\"\"\n", " searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.\n", @@ -178,29 +181,33 @@ " # Format the entity for URL encoding\n", " entity_formatted = entity.replace(\" \", \"+\")\n", " url = f\"https://en.wikipedia.org/w/index.php?search={entity_formatted}\"\n", - " \n", + "\n", " # Fetch the page\n", " response = requests.get(url)\n", - " soup = BeautifulSoup(response.text, 'html.parser')\n", - " \n", + " soup = BeautifulSoup(response.text, \"html.parser\")\n", + "\n", " # Check if the exact page was found or suggest similar items\n", " # when
is detected, it means the entity page is not found on wikipedia\n", " result_divs = soup.find_all(\"div\", {\"class\": \"mw-search-result-heading\"})\n", - " \n", - " if result_divs: # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities\n", + "\n", + " if (\n", + " result_divs\n", + " ): # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities\n", " # get Similar results\n", " similar_titles = [div.a.get_text() for div in result_divs]\n", - " return f\"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}\" # return the top 5 similar titles\n", + " return f\"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}\" # return the top 5 similar titles\n", " else:\n", " # the paper uses page to represent content in

\n", " # Extract xontent\n", - " page_list = [p.get_text().strip() for p in soup.find_all(\"p\") + soup.find_all(\"ul\")]\n", + " page_list = [\n", + " p.get_text().strip() for p in soup.find_all(\"p\") + soup.find_all(\"ul\")\n", + " ]\n", " # TODO: Recursive search, if find any concept that needs more search then call search again\n", " # if any(\"may refer to:\" in p for p in page_list):\n", " # search(entity)\n", "\n", " # restructure & clean the page content following the paper's logic\n", - " page = ''\n", + " page = \"\"\n", " for p in page_list:\n", " if len(p.split(\" \")) > 2:\n", " page += clean_str(p)\n", @@ -208,31 +215,39 @@ " page += \"\\n\"\n", " paragraphs = page.split(\"\\n\")\n", " paragraphs = [p.strip() for p in paragraphs if p.strip()]\n", - " \n", + "\n", " sentences = []\n", " for p in paragraphs:\n", - " sentences += p.split('. ')\n", - " sentences = [s.strip() + '.' for s in sentences if s.strip()]\n", - " \n", + " sentences += p.split(\". \")\n", + " sentences = [s.strip() + \".\" for s in sentences if s.strip()]\n", + "\n", " # return the first 5 sentences\n", " if sentences:\n", - " return ' '.join(sentences[:5]) if len(sentences)>=5 else ' '.join(sentences)\n", + " return (\n", + " \" \".join(sentences[:5]) if len(sentences) >= 5 else \" \".join(sentences)\n", + " )\n", " else:\n", " return \"No content found on this page.\"\n", - " \n", + "\n", " # TODO: clean the paragraphs and return the searched content\n", "\n", "\n", "def lookup(text: str, keyword: str) -> str:\n", " \"\"\"\n", - " returns the sentences containing keyword in the current passage.\n", + " returns the sentences containing keyword in the current passage.\n", " \"\"\"\n", - " sentences = text.split('.')\n", - " matching_sentences = [sentence.strip() + '.' for sentence in sentences if keyword.lower() in sentence.lower()]\n", + " sentences = text.split(\".\")\n", + " matching_sentences = [\n", + " sentence.strip() + \".\"\n", + " for sentence in sentences\n", + " if keyword.lower() in sentence.lower()\n", + " ]\n", " if not matching_sentences:\n", " return \"No sentences found with the keyword.\"\n", " else:\n", - " return ' '.join(matching_sentences) # Join all matching sentences into a single string" + " return \" \".join(\n", + " matching_sentences\n", + " ) # Join all matching sentences into a single string" ] }, { @@ -262,7 +277,7 @@ "outputs": [], "source": [ "examples = [\n", - "\"\"\"Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\n", + " \"\"\"Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\n", "Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.\n", "Action 1: search(\"Colorado orogeny\")\n", "Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\n", @@ -277,7 +292,7 @@ "Observation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\n", "Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\n", "Action 5: finish(\"1,800 to 7,000 ft\")\"\"\",\n", - "\"\"\"Question: Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?\n", + " \"\"\"Question: Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?\n", "Thought 1: The question simplifies to \"The Simpsons\" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.\n", "Action 1: search(\"Milhouse\")\n", "Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\n", @@ -286,7 +301,7 @@ "Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous. \n", "Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\n", "Action 3: finish(\"Richard Nixon\")\"\"\",\n", - "\"\"\"Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\n", + " \"\"\"Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\n", "Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\n", "Action 1: search(\"Adam Clayton Powell\")\n", "Observation 1: Could not find [\"Adam Clayton Powell\"]. Similar: ['Adam Clayton Powell III', 'Seventh Avenue (Manhattan)', 'Adam Clayton Powell Jr. State Office Building', 'Isabel Washington Powell', 'Adam Powell', 'Adam Clayton Powell (film)', 'Giancarlo Esposito'].\n", @@ -296,7 +311,7 @@ "The film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\n", "Thought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\n", "Action 3: finish(\"The Saimaa Gesture\")\"\"\",\n", - "\"\"\"Question: What profession does Nicholas Ray and Elia Kazan have in common?\n", + " \"\"\"Question: What profession does Nicholas Ray and Elia Kazan have in common?\n", "Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\n", "Action 1: search(\"Nicholas Ray\")\n", "Observation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 โ€“ June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\n", @@ -305,7 +320,7 @@ "Observation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\n", "Thought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\n", "Action 3: finish(\"director, screenwriter, actor\")\"\"\",\n", - "\"\"\"Question: Which magazine was started first Arthur's Magazine or First for Women?\n", + " \"\"\"Question: Which magazine was started first Arthur's Magazine or First for Women?\n", "Thought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.\n", "Action 1: search(\"Arthur's Magazine\")\n", "Observation 1: Arthur's Magazine (1844-ย€ย“1846) was an American literary periodical published in Philadelphia in the 19th century. \n", @@ -314,7 +329,7 @@ "Observation 2: First for Women is a woman's magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. \n", "Thought 3: First for Women was started in 1989. 1844 (Arthur's Magazine) < 1989 (First for Women), so Arthur's Magazine was started first.\n", "Action 3: finish(\"Arthur's Magazine\")\"\"\",\n", - "\"\"\"Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?\n", + " \"\"\"Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?\n", "Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\n", "Action 1: search(\"Pavel Urysohn\")\n", "Observation 1: Pavel Samuilovich Urysohn (February 3, 1898 รขย€ย“ August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\n", @@ -322,7 +337,7 @@ "Action 2: search(\"Leonid Levin\")\n", "Observation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist. \n", "Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work. \n", - "Action 3: finish(\"yes\")\"\"\"\n", + "Action 3: finish(\"yes\")\"\"\",\n", "]" ] }, @@ -334,7 +349,7 @@ "source": [ "# preset up the examples as prompt_kwargs, the examples will be included in the system prompt\n", "\n", - "preset_prompt_kwargs = {\"examples\": examples} " + "preset_prompt_kwargs = {\"examples\": examples}" ] }, { @@ -353,8 +368,8 @@ "outputs": [], "source": [ "gpt_model_kwargs = {\n", - " \"model\": \"gpt-3.5-turbo\",\n", - " \"temperature\": 0.0,\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"temperature\": 0.0,\n", "}" ] }, @@ -517,8 +532,11 @@ "# max_steps refers to how many thought-action round we allow the model to perform\n", "# to save resources, let's use 3 here\n", "agent = ReActAgent(\n", - " tools=tools, max_steps=3, model_client=OpenAIClient(),\n", - " model_kwargs=gpt_model_kwargs, preset_prompt_kwargs=preset_prompt_kwargs\n", + " tools=tools,\n", + " max_steps=3,\n", + " model_client=OpenAIClient(),\n", + " model_kwargs=gpt_model_kwargs,\n", + " preset_prompt_kwargs=preset_prompt_kwargs,\n", ")\n", "agent" ] @@ -542,7 +560,8 @@ "source": [ "import importlib\n", "import adalflow\n", - "importlib.reload(adalflow)\n" + "\n", + "importlib.reload(adalflow)" ] }, { @@ -681,13 +700,18 @@ "num_questions = 5\n", "for i in range(num_questions):\n", " question = val_dataset[i][\"question\"]\n", - " gt_answer = normalize_answer(val_dataset[i][\"answer\"]) # normalize the ground truth answer\n", - " \n", + " gt_answer = normalize_answer(\n", + " val_dataset[i][\"answer\"]\n", + " ) # normalize the ground truth answer\n", + "\n", " # get the agent's response\n", " pred_answer = agent(question)\n", " pred_answer = normalize_answer(pred_answer)\n", - " \n", - " printc(f\"question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\", color=\"yellow\")\n" + "\n", + " printc(\n", + " f\"question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n", + " color=\"yellow\",\n", + " )" ] }, { @@ -995,8 +1019,11 @@ "FM_evaluator = AnswerMatchAcc(type=\"fuzzy_match\")\n", "\n", "agent = ReActAgent(\n", - " tools=tools, max_steps=7, model_client=OpenAIClient(),\n", - " model_kwargs=gpt_model_kwargs, preset_prompt_kwargs=preset_prompt_kwargs\n", + " tools=tools,\n", + " max_steps=7,\n", + " model_client=OpenAIClient(),\n", + " model_kwargs=gpt_model_kwargs,\n", + " preset_prompt_kwargs=preset_prompt_kwargs,\n", ")\n", "\n", "num_questions = 10\n", @@ -1005,18 +1032,23 @@ "start_time = time.time()\n", "for i in range(num_questions):\n", " question = val_dataset[i][\"question\"]\n", - " gt_answer = normalize_answer(val_dataset[i][\"answer\"]) # normalize the ground truth answer\n", + " gt_answer = normalize_answer(\n", + " val_dataset[i][\"answer\"]\n", + " ) # normalize the ground truth answer\n", " gt_answers.append(gt_answer)\n", - " \n", + "\n", " # get the agent's response\n", " pred_answer = agent(question)\n", " pred_answer = normalize_answer(pred_answer)\n", " pred_answers.append(pred_answer)\n", - " \n", - " printc(f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\", color=\"yellow\")\n", + "\n", + " printc(\n", + " f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n", + " color=\"yellow\",\n", + " )\n", "\n", "end_time = time.time()\n", - " \n", + "\n", "em = EM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", "fm = FM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", "avg_time = (end_time - start_time) / num_questions\n", @@ -1262,8 +1294,7 @@ "FM_evaluator = AnswerMatchAcc(type=\"fuzzy_match\")\n", "\n", "agent = ReActAgent(\n", - " max_steps=7, model_client=OpenAIClient(),\n", - " model_kwargs=gpt_model_kwargs\n", + " max_steps=7, model_client=OpenAIClient(), model_kwargs=gpt_model_kwargs\n", ")\n", "\n", "num_questions = 10\n", @@ -1272,18 +1303,23 @@ "start_time = time.time()\n", "for i in range(num_questions):\n", " question = val_dataset[i][\"question\"]\n", - " gt_answer = normalize_answer(val_dataset[i][\"answer\"]) # normalize the ground truth answer\n", + " gt_answer = normalize_answer(\n", + " val_dataset[i][\"answer\"]\n", + " ) # normalize the ground truth answer\n", " gt_answers.append(gt_answer)\n", - " \n", + "\n", " # get the agent's response\n", " pred_answer = agent(question)\n", " pred_answer = normalize_answer(pred_answer)\n", " pred_answers.append(pred_answer)\n", - " \n", - " printc(f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\", color=\"yellow\")\n", + "\n", + " printc(\n", + " f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n", + " color=\"yellow\",\n", + " )\n", "\n", "end_time = time.time()\n", - " \n", + "\n", "em = EM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", "fm = FM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", "avg_time = (end_time - start_time) / num_questions\n", diff --git a/use_cases/generator/intermediate.ipynb b/use_cases/generator/intermediate.ipynb index 80f8cff8..fb4dc8a8 100644 --- a/use_cases/generator/intermediate.ipynb +++ b/use_cases/generator/intermediate.ipynb @@ -30,7 +30,10 @@ "from adalflow.core import Component, Generator, Sequential\n", "from adalflow.components.model_client import OpenAIClient\n", "from adalflow.components.model_client import GroqAPIClient\n", - "from adalflow.utils import setup_env # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY\n", + "from adalflow.utils import (\n", + " setup_env,\n", + ") # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY\n", + "\n", "setup_env(\".env\")" ] }, @@ -72,6 +75,7 @@ "source": [ "# Let's turn on the library log to help with debugging.\n", "from adalflow.utils import get_logger\n", + "\n", "get_logger()" ] }, @@ -248,6 +252,8 @@ "# Router component\n", "\n", "from typing import Dict\n", + "\n", + "\n", "class Router(Component):\n", " def __init__(self, choices: Dict[str, str] = {}):\n", " super().__init__()\n", @@ -260,9 +266,9 @@ "\n", " def call(self, query: str) -> str:\n", " prompt_kwargs = {\"input_str\": query, \"choices\": self.choices}\n", - " choice = self.router(prompt_kwargs=prompt_kwargs).data\n", + " choice = self.router(prompt_kwargs=prompt_kwargs).data\n", " return {\"choice\": choice, \"query\": query}\n", - " \n", + "\n", " def _extra_repr(self):\n", " return f\"Choices: {self.choices}, \"" ] @@ -329,6 +335,7 @@ "source": [ "# the second chat component with two generators\n", "\n", + "\n", "class Chat(Component):\n", " def __init__(self):\n", " super().__init__()\n", @@ -342,6 +349,7 @@ " model_client=GroqAPIClient(),\n", " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", " )\n", + "\n", " # to chain together just to make sure the output can be directly passed to the next as input\n", " def call(self, input: Dict[str, str]) -> Dict[str, str]:\n", " choice = input.get(\"choice\", None)\n", @@ -412,7 +420,9 @@ "class QAWithRouter(Component):\n", " def __init__(self):\n", " super().__init__()\n", - " self.router = Router(choices={\"doctor\": \"Doctor\", \"lawyer\": \"Lawyer\", \"other\": \"Other\"})\n", + " self.router = Router(\n", + " choices={\"doctor\": \"Doctor\", \"lawyer\": \"Lawyer\", \"other\": \"Other\"}\n", + " )\n", " self.chat = Chat()\n", " self.pipeline = Sequential(self.router, self.chat)\n", "\n", diff --git a/use_cases/question_answering/chatbot.ipynb b/use_cases/question_answering/chatbot.ipynb index 3db858a4..7ed71347 100644 --- a/use_cases/question_answering/chatbot.ipynb +++ b/use_cases/question_answering/chatbot.ipynb @@ -21,6 +21,7 @@ "outputs": [], "source": [ "from IPython.display import clear_output\n", + "\n", "!pip install -U adalflow[openai,groq,faiss-cpu]\n", "clear_output()" ] @@ -37,7 +38,9 @@ "from adalflow.core.component import Component\n", "from adalflow.core.generator import Generator\n", "from adalflow.components.memory.memory import Memory\n", - "from adalflow.components.model_client import OpenAIClient # Here, we use the OpenAIClient as an example, but you can use any other clients (with the corresponding API Key as needed), such as AnthropicAPIClient" + "from adalflow.components.model_client import (\n", + " OpenAIClient,\n", + ") # Here, we use the OpenAIClient as an example, but you can use any other clients (with the corresponding API Key as needed), such as AnthropicAPIClient" ] }, { @@ -49,7 +52,7 @@ "# Prompt user to enter their API keys securely\n", "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", "# Replace with your OpenAI API Key, or you can put it in a .env file" ] }, @@ -64,11 +67,10 @@ " def __init__(self):\n", " super().__init__()\n", " self.generator = Generator(\n", - " model_client=OpenAIClient(),\n", - " model_kwargs={'model': 'gpt-4o-mini'}\n", + " model_client=OpenAIClient(), model_kwargs={\"model\": \"gpt-4o-mini\"}\n", " )\n", - " self.chat_history = Memory() # Memory to store the chat history\n", - " \n", + " self.chat_history = Memory() # Memory to store the chat history\n", + "\n", " def call(self) -> str:\n", " print(\"Welcome to the ChatBot. Type anything to chat. Type 'exit' to end.\")\n", " while True:\n", @@ -90,6 +92,7 @@ " )\n", " print(f\"ChatBot: {response}\")\n", "\n", + "\n", "chatbot = ChatBot()\n", "print(chatbot)" ] diff --git a/use_cases/question_answering/simple_qa.ipynb b/use_cases/question_answering/simple_qa.ipynb index 67dc9b04..cec10d76 100644 --- a/use_cases/question_answering/simple_qa.ipynb +++ b/use_cases/question_answering/simple_qa.ipynb @@ -32,7 +32,10 @@ "outputs": [], "source": [ "# Here, we use the OpenAIClient as an example, but you can use any other clients (with the corresponding API Key as needed), such as AnthropicAPIClient\n", - "from adalflow.utils import setup_env # make sure you have a .env file with OPENAI_API_KEY or any other key mentioned with respect to your usage\n", + "from adalflow.utils import (\n", + " setup_env,\n", + ") # make sure you have a .env file with OPENAI_API_KEY or any other key mentioned with respect to your usage\n", + "\n", "setup_env(\".env\")\n", "from adalflow.components.model_client import OpenAIClient" ] @@ -119,12 +122,12 @@ " def __init__(self):\n", " super().__init__()\n", " self.generator = Generator(\n", - " model_client=OpenAIClient(),\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}\n", + " model_client=OpenAIClient(), model_kwargs={\"model\": \"gpt-3.5-turbo\"}\n", " )\n", "\n", " def call(self, query: str):\n", - " return self.generator.call(prompt_kwargs={'input_str': query})\n", + " return self.generator.call(prompt_kwargs={\"input_str\": query})\n", + "\n", "\n", "simple_qa = SimpleQA()\n", "print(simple_qa)"