From 896691e1113ec8af9b595c6966f87b2f51b3e55d Mon Sep 17 00:00:00 2001
From: Li Yin
Date: Mon, 21 Oct 2024 12:14:33 +0800
Subject: [PATCH 1/2] address the error when the pred is 0 and we did not pass
the eval data
---
adalflow/adalflow/utils/__init__.py | 8 +
.../source/get_started/adalflow_in_15mins.rst | 2 +-
.../tutorials/lightrag_design_philosophy.rst | 4 +-
docs/source/use_cases/question_answering.rst | 2 +-
.../question_answering_word_sort.rst | 2 +-
...lflow_object_count_auto_optimization.ipynb | 1838 ++++++++---------
.../bbh/object_count/diagnose.py | 4 +-
.../bbh/object_count/train_new.py | 4 +-
.../bbh/word_sorting/diagnose.py | 4 +-
.../bbh/word_sorting/train.py | 4 +-
.../bbh/word_sorting/train_paper.py | 4 +-
11 files changed, 947 insertions(+), 929 deletions(-)
diff --git a/adalflow/adalflow/utils/__init__.py b/adalflow/adalflow/utils/__init__.py
index 1d23aa63..7b4b6012 100644
--- a/adalflow/adalflow/utils/__init__.py
+++ b/adalflow/adalflow/utils/__init__.py
@@ -19,6 +19,9 @@
from .config import new_components_from_config, new_component
from .lazy_import import LazyImport, OptionalPackages, safe_import
from .setup_env import setup_env
+from .data import DataLoader, Dataset, Subset
+from .global_config import get_adalflow_default_root_path
+from .cache import CachedEngine
__all__ = [
@@ -43,4 +46,9 @@
"write_list_to_jsonl",
"safe_import",
"setup_env",
+ "DataLoader",
+ "Dataset",
+ "Subset",
+ "get_adalflow_default_root_path",
+ "CachedEngine",
]
diff --git a/docs/source/get_started/adalflow_in_15mins.rst b/docs/source/get_started/adalflow_in_15mins.rst
index 419f4f71..cf372ff1 100644
--- a/docs/source/get_started/adalflow_in_15mins.rst
+++ b/docs/source/get_started/adalflow_in_15mins.rst
@@ -379,7 +379,7 @@ Here’s the minimum code required to get started on evaluating the task pipelin
self, sample: Example, y_pred: adal.GeneratorOutput
) -> float:
y_label = -1
- if y_pred and y_pred.data:
+ if (y_pred is not None and y_pred.data is not None): # if y_pred and y_pred.data: might introduce bug when the data is 0
y_label = y_pred.data
return self.eval_fn(y=y_label, y_gt=sample.answer)
diff --git a/docs/source/tutorials/lightrag_design_philosophy.rst b/docs/source/tutorials/lightrag_design_philosophy.rst
index 83bb3272..b215b0b1 100644
--- a/docs/source/tutorials/lightrag_design_philosophy.rst
+++ b/docs/source/tutorials/lightrag_design_philosophy.rst
@@ -3,12 +3,12 @@
Design Philosophy
====================================
-Right from the begining, `LightRAG` follows three fundamental principles.
+Right from the begining, `AdalFlow` follows three fundamental principles.
Principle 1: Simplicity over Complexity
-----------------------------------------------------------------------
- We put these three hard rules while designing LightRAG:
+ We put these three hard rules while designing AdalFlow:
- Every layer of abstraction needs to be adjusted and overall we do not allow more than 3 layers of abstraction.
- We minimize the lines of code instead of maximizing the lines of code.
diff --git a/docs/source/use_cases/question_answering.rst b/docs/source/use_cases/question_answering.rst
index 0b730400..2a95db3d 100644
--- a/docs/source/use_cases/question_answering.rst
+++ b/docs/source/use_cases/question_answering.rst
@@ -396,7 +396,7 @@ Here’s the minimum code required to get started on evaluating the task pipelin
def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:
y_label = -1
- if y_pred and y_pred.data:
+ if (y_pred is not None and y_pred.data is not None): # if y_pred and y_pred.data: might introduce bug when the data is 0
y_label = y_pred.data
return self.eval_fn, {"y": y_label, "y_gt": sample.answer}
diff --git a/docs/source/use_cases/question_answering_word_sort.rst b/docs/source/use_cases/question_answering_word_sort.rst
index a349f6dd..4b57a5b4 100644
--- a/docs/source/use_cases/question_answering_word_sort.rst
+++ b/docs/source/use_cases/question_answering_word_sort.rst
@@ -375,7 +375,7 @@ Here’s the minimum code required to get started on evaluating the task pipelin
self, sample: Example, y_pred: adal.GeneratorOutput
) -> float:
y_label = -1
- if y_pred and y_pred.data:
+ if (y_pred is not None and y_pred.data is not None): # if y_pred and y_pred.data: might introduce bug when the data is 0
y_label = y_pred.data
return self.eval_fn(y=y_label, y_gt=sample.answer)
diff --git a/notebooks/qas/adalflow_object_count_auto_optimization.ipynb b/notebooks/qas/adalflow_object_count_auto_optimization.ipynb
index 017363cd..e3f6dce3 100644
--- a/notebooks/qas/adalflow_object_count_auto_optimization.ipynb
+++ b/notebooks/qas/adalflow_object_count_auto_optimization.ipynb
@@ -1,21 +1,10 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": []
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
- }
- },
"cells": [
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "VVSOpjzJl_cx"
+ },
"source": [
"# 🤗 Welcome to AdalFlow!\n",
"## The PyTorch library to auto-optimize any LLM task pipelines\n",
@@ -56,10 +45,7 @@
" pip install adalflow[openai,groq,faiss-cpu]\n",
" ```\n",
"2. Setup `openai` and `groq` API key in the environment variables"
- ],
- "metadata": {
- "id": "VVSOpjzJl_cx"
- }
+ ]
},
{
"cell_type": "code",
@@ -78,11 +64,7 @@
},
{
"cell_type": "code",
- "source": [
- "import adalflow as adal\n",
- "\n",
- "adal.__version__"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -91,25 +73,32 @@
"id": "nJteJKsNrpcu",
"outputId": "d9f7b4d0-d11c-480d-d858-bf9022c18998"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "execute_result",
"data": {
- "text/plain": [
- "'0.2.0'"
- ],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
- }
+ },
+ "text/plain": [
+ "'0.2.0'"
+ ]
},
+ "execution_count": 2,
"metadata": {},
- "execution_count": 2
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "import adalflow as adal\n",
+ "\n",
+ "adal.__version__"
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "KapUyHMM07pJ"
+ },
"source": [
"## Set Environment Variables\n",
"\n",
@@ -118,29 +107,11 @@
"Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n",
"\n",
"*Go to [OpenAI](https://platform.openai.com/docs/introduction) and [Groq](https://console.groq.com/docs/) to get API keys if you don't already have.*"
- ],
- "metadata": {
- "id": "KapUyHMM07pJ"
- }
+ ]
},
{
"cell_type": "code",
- "source": [
- "import os\n",
- "\n",
- "from getpass import getpass\n",
- "\n",
- "# Prompt user to enter their API keys securely\n",
- "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
- "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n",
- "\n",
- "\n",
- "# Set environment variables\n",
- "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
- "os.environ['GROQ_API_KEY'] = groq_api_key\n",
- "\n",
- "print(\"API keys have been set.\")"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -148,21 +119,39 @@
"id": "ONfzF9Puzdd_",
"outputId": "6a815e21-ab99-463e-c53b-e39ca2ce8f3f"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Please enter your OpenAI API key: ··········\n",
"Please enter your GROQ API key: ··········\n",
"API keys have been set.\n"
]
}
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "from getpass import getpass\n",
+ "\n",
+ "# Prompt user to enter their API keys securely\n",
+ "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
+ "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n",
+ "\n",
+ "\n",
+ "# Set environment variables\n",
+ "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
+ "os.environ['GROQ_API_KEY'] = groq_api_key\n",
+ "\n",
+ "print(\"API keys have been set.\")"
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "SfGS7iddtfpj"
+ },
"source": [
"\n",
"\n",
@@ -173,13 +162,15 @@
"Different from our other pipelines where the `prompt_kwargs` values are strings, but here we will use ``Parameter``. And we will set up two parameter, one is of ``ParameterType.PROMPT`` and the other of type ``ParameterType.DEMOS``. The first one will be trained by text-grad and the second will be trained by boostrap few shot optimizer.\n",
"\n",
"\n"
- ],
- "metadata": {
- "id": "SfGS7iddtfpj"
- }
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "nHnvAbO-pXUq"
+ },
+ "outputs": [],
"source": [
"import adalflow as adal\n",
"import re\n",
@@ -254,15 +245,13 @@
" return output\n",
"\n",
"\n"
- ],
- "metadata": {
- "id": "nHnvAbO-pXUq"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "AvZJjdzZa0cT"
+ },
"source": [
"Next, we will run this pipeline in both train and eval mode.\n",
"\n",
@@ -270,13 +259,13 @@
"\n",
"Eval mode will output ``GeneratorOutput``.\n",
"\n"
- ],
- "metadata": {
- "id": "AvZJjdzZa0cT"
- }
+ ]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "Gks3yS8hcR6_"
+ },
"source": [
"\n",
"#### Train mode with different form of output\n",
@@ -286,13 +275,55 @@
"As the `data` field of the `Parameter` directly communicate with the Optimizer, which are an LLM itself, its better than they understand exactly the string response itself instead of the parsed one.\n",
"\n",
"Later you will see that we also use ``eval_input`` of the parameter to communicate with the `LossFunction` as that need the parsed final output."
- ],
- "metadata": {
- "id": "Gks3yS8hcR6_"
- }
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "eqQSFnZOpfWJ",
+ "outputId": "05b5fc83-09d1-45f4-aacc-6d460fbdd7bd"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+ "ObjectCountTaskPipeline(\n",
+ " (llm_counter): Generator(\n",
+ " model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+ " (prompt): Prompt(\n",
+ " template: \n",
+ " {{system_prompt}}\n",
+ " {# Few shot demos #}\n",
+ " {% if few_shot_demos is not none %}\n",
+ " Here are some examples:\n",
+ " {{few_shot_demos}}\n",
+ " {% endif %}\n",
+ " \n",
+ " \n",
+ " {{input_str}}\n",
+ " \n",
+ " , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+ " )\n",
+ " (model_client): OpenAIClient()\n",
+ " (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+ " )\n",
+ ")\n"
+ ]
+ }
+ ],
"source": [
"from adalflow.components.model_client.openai_client import OpenAIClient\n",
"from adalflow.components.model_client.groq_client import GroqAPIClient\n",
@@ -335,59 +366,11 @@
"question = \"I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?\"\n",
"task_pipeline = ObjectCountTaskPipeline(**gpt_3_model)\n",
"print(task_pipeline)\n"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "eqQSFnZOpfWJ",
- "outputId": "05b5fc83-09d1-45f4-aacc-6d460fbdd7bd"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
- "ObjectCountTaskPipeline(\n",
- " (llm_counter): Generator(\n",
- " model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
- " (prompt): Prompt(\n",
- " template: \n",
- " {{system_prompt}}\n",
- " {# Few shot demos #}\n",
- " {% if few_shot_demos is not none %}\n",
- " Here are some examples:\n",
- " {{few_shot_demos}}\n",
- " {% endif %}\n",
- " \n",
- " \n",
- " {{input_str}}\n",
- " \n",
- " , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
- " )\n",
- " (model_client): OpenAIClient()\n",
- " (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
- " )\n",
- ")\n"
- ]
- }
]
},
{
"cell_type": "code",
- "source": [
- "answer = task_pipeline(question, id=\"1\")\n",
- "print(answer)"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -395,26 +378,23 @@
"id": "DE1xNdYvcXw8",
"outputId": "25844c2a-5d4c-4c68-8ca5-38b79ca5b398"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"GeneratorOutput(id='1', data=8, error=None, usage=CompletionUsage(completion_tokens=77, prompt_tokens=113, total_tokens=190), raw_response='To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \\n\\nYou have:\\n- Flute\\n- Piano\\n- Trombone\\n- Violin\\n- Accordion\\n- Clarinet\\n- Drum\\n- Trumpet\\n\\nCounting each of these instruments, we get a total of 8 musical instruments.\\n\\nAnswer: 8', metadata=None)\n"
]
}
+ ],
+ "source": [
+ "answer = task_pipeline(question, id=\"1\")\n",
+ "print(answer)"
]
},
{
"cell_type": "code",
- "source": [
- "# set it to train mode\n",
- "task_pipeline.train()\n",
- "answer = task_pipeline(question, id=\"1\")\n",
- "print(answer)\n",
- "print(f\"full_response: {answer.full_response}\")"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -422,11 +402,10 @@
"id": "AGUlUsGxcaby",
"outputId": "8c8588fe-2994-4d9e-c2d1-26453141f43f"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Parameter(name=Generator_output, requires_opt=True, param_type=generator_output (The output of the generator.), role_desc=Output from (llm) Generator, data=To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \n",
"\n",
@@ -446,31 +425,43 @@
"full_response: GeneratorOutput(id=None, data=8, error=None, usage=CompletionUsage(completion_tokens=77, prompt_tokens=113, total_tokens=190), raw_response='To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \\n\\nYou have:\\n- Flute\\n- Piano\\n- Trombone\\n- Violin\\n- Accordion\\n- Clarinet\\n- Drum\\n- Trumpet\\n\\nCounting each of these instruments, we get a total of 8 musical instruments.\\n\\nAnswer: 8', metadata=None)\n"
]
}
+ ],
+ "source": [
+ "# set it to train mode\n",
+ "task_pipeline.train()\n",
+ "answer = task_pipeline(question, id=\"1\")\n",
+ "print(answer)\n",
+ "print(f\"full_response: {answer.full_response}\")"
]
},
{
"cell_type": "code",
- "source": [
- "!pip install datasets\n",
- "clear_output()"
- ],
+ "execution_count": null,
"metadata": {
"id": "YDAiuFzcr4YA"
},
- "execution_count": null,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "!pip install datasets\n",
+ "clear_output()"
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "### Load Datasets"
- ],
"metadata": {
"id": "-Gvfcy2IcgWx"
- }
+ },
+ "source": [
+ "### Load Datasets"
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "AYBIGsIHpjMe"
+ },
+ "outputs": [],
"source": [
"from adalflow.datasets.big_bench_hard import BigBenchHard\n",
"from adalflow.utils.data import subset_dataset\n",
@@ -488,21 +479,11 @@
" test_data = subset_dataset(test_data, max_samples)\n",
"\n",
" return train_data, val_data, test_data\n"
- ],
- "metadata": {
- "id": "AYBIGsIHpjMe"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
- "source": [
- "# check the datasets\n",
- "\n",
- "train_data, val_data, test_data = load_datasets(max_samples=2)\n",
- "print(train_data[0])"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -510,43 +491,51 @@
"id": "asw-pJrid8ly",
"outputId": "31807c34-0de9-45e5-ebdd-778aa5313802"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Example(id='d3f33ded-170a-4b87-9b0b-987d5fb7b817', question='I have a cauliflower, a stalk of celery, a cabbage, and a garlic. How many vegetables do I have?', answer='4')\n"
]
}
+ ],
+ "source": [
+ "# check the datasets\n",
+ "\n",
+ "train_data, val_data, test_data = load_datasets(max_samples=2)\n",
+ "print(train_data[0])"
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "VAVtXE9xeEHt"
+ },
"source": [
"### Soft link to AdalFlow default file path\n",
"\n",
"Lets' match the default to the current project, so that you can see the downloaded data and later the checkpoints of the training."
- ],
- "metadata": {
- "id": "VAVtXE9xeEHt"
- }
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "1SaKH6dkeWus"
+ },
+ "outputs": [],
"source": [
"! ln -s /root/.adalflow /content/adalflow\n",
"\n",
"# go to files then you will see a folder named as adalflow"
- ],
- "metadata": {
- "id": "1SaKH6dkeWus"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "YWZzOvAHenME"
+ },
"source": [
"# 😊 AdalComponent to define everything we need to train\n",
"\n",
@@ -557,26 +546,28 @@
"3. For the demo optimizer, we need a `teacher_model_config` to config a teacher generator, in this case, it is the `llm_counter`. The teacher will share the same prompt with the `llm_counter` but you can use a more advanced model.\n",
"\n",
"In general, we should have all of these parts to use a more advanced model."
- ],
- "metadata": {
- "id": "YWZzOvAHenME"
- }
+ ]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "9QoNoMWD0rgV"
+ },
"source": [
"## 🧑 Diagnose\n",
"\n",
"Diagnose is more of an evaluation, but with detailed logs so that you can manually inspect the wrong output.\n",
"\n",
"This one shows the minimum config you need to get the `diagnose` work."
- ],
- "metadata": {
- "id": "9QoNoMWD0rgV"
- }
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "6mi7lM3U24Eg"
+ },
+ "outputs": [],
"source": [
"from adalflow.datasets.types import Example\n",
"from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
@@ -595,18 +586,18 @@
" self, sample: Example, y_pred: adal.GeneratorOutput\n",
" ) -> float:\n",
" y_label = -1\n",
- " if y_pred and y_pred.data:\n",
+ " if (y_pred is not None and y_pred.data is not None): # if y_pred and y_pred.data: might introduce bug when the data is 0\n",
" y_label = y_pred.data\n",
" return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}"
- ],
- "metadata": {
- "id": "6mi7lM3U24Eg"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "eliPeVeM2wcP"
+ },
+ "outputs": [],
"source": [
"def diagnose(\n",
" model_client: adal.ModelClient,\n",
@@ -621,18 +612,11 @@
" trainer.diagnose(dataset=trainset, split=\"train\")\n",
" trainer.diagnose(dataset=valset, split=\"val\")\n",
" trainer.diagnose(dataset=testset, split=\"test\")"
- ],
- "metadata": {
- "id": "eliPeVeM2wcP"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
- "source": [
- "diagnose(**gpt_3_model)"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -640,18 +624,17 @@
"id": "nKl9clcb3dFj",
"outputId": "676fbb96-c70b-40ab-ea15-93ade1aa9e66"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
"Checkpoint path: /root/.adalflow/ckpt/ObjectCountAdalComponent\n",
@@ -680,16 +663,16 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5957.82it/s]\n",
"Evaluating step(0): 0.88 across 50 samples, Max potential: 0.88: 100%|██████████| 50/50 [00:15<00:00, 3.27it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sorted_indices: [8, 16, 23, 25, 31, 47, 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49]\n",
"sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
@@ -720,16 +703,16 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3203.76it/s]\n",
"Evaluating step(0): 0.8 across 50 samples, Max potential: 0.8: 100%|██████████| 50/50 [00:15<00:00, 3.26it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sorted_indices: [1, 2, 5, 10, 24, 36, 38, 42, 44, 47, 0, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 40, 41, 43, 45, 46, 48, 49]\n",
"sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
@@ -760,16 +743,16 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Loading Data: 100%|██████████| 100/100 [00:00<00:00, 5545.09it/s]\n",
"Evaluating step(0): 0.83 across 100 samples, Max potential: 0.83: 100%|██████████| 100/100 [00:28<00:00, 3.50it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sorted_indices: [7, 18, 19, 20, 23, 24, 25, 43, 58, 59, 63, 74, 75, 79, 85, 97, 99, 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 77, 78, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 98]\n",
"sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
@@ -778,16 +761,22 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n"
]
}
+ ],
+ "source": [
+ "diagnose(**gpt_3_model)"
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "dSu4VQri3y3D"
+ },
"source": [
"Now, you can go to `/content/adalflow/ckpt/ObjectCountAdalComponent/diagnose_train/stats.json` to view the average score for each split. And also the `diagnose.json` for different errors.\n",
"\n",
@@ -797,33 +786,35 @@
"|:--------- |:--------:| ---------:|\n",
"| 0.88 | 0.8 | 0.83 |\n",
"\n"
- ],
- "metadata": {
- "id": "dSu4VQri3y3D"
- }
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "## 🐛 Debug"
- ],
"metadata": {
"id": "1vzJyp-W0z7I"
- }
+ },
+ "source": [
+ "## 🐛 Debug"
+ ]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "TmlCvJu804dJ"
+ },
"source": [
"## ✅ Train\n",
"\n",
"Now, let's start training."
- ],
- "metadata": {
- "id": "TmlCvJu804dJ"
- }
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "4TWCn0did6-K"
+ },
+ "outputs": [],
"source": [
"from adalflow.datasets.types import Example\n",
"from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
@@ -873,15 +864,15 @@
" # pred's full_response is the output of the task pipeline which is GeneratorOutput\n",
" pred.eval_input = pred.full_response.data\n",
" return self.loss_fn, {\"kwargs\": {\"y\": pred, \"y_gt\": y_gt}}"
- ],
- "metadata": {
- "id": "4TWCn0did6-K"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "dezwX2yn1eQS"
+ },
+ "outputs": [],
"source": [
"def train(\n",
" train_batch_size=4, # larger batch size is not that effective, probably because of llm's lost in the middle\n",
@@ -925,49 +916,38 @@
" debug=debug,\n",
" resume_from_ckpt=resume_from_ckpt,\n",
" )\n"
- ],
- "metadata": {
- "id": "dezwX2yn1eQS"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "We use `Sequential` in default, we will end up with 24 steps in total, 12 for text optimizer and 12 for the demo optimizer."
- ],
"metadata": {
"id": "NGKYozGt60Pp"
- }
+ },
+ "source": [
+ "We use `Sequential` in default, we will end up with 24 steps in total, 12 for text optimizer and 12 for the demo optimizer."
+ ]
},
{
"cell_type": "code",
- "source": [
- "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
- " raw_shots=0, bootstrap_shots=1,\n",
- " exclude_input_fields_from_bootstrap_demos=True\n",
- " )"
- ],
+ "execution_count": null,
"metadata": {
- "id": "yDwLwL0L7Rsw",
"colab": {
"base_uri": "https://localhost:8080/"
},
+ "id": "yDwLwL0L7Rsw",
"outputId": "1b7e413b-a1d3-4388-fc0c-ca4b1c072585"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
"ObjectCountAdalComponent(\n",
@@ -1087,8 +1067,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 6482.70it/s]\n",
"Evaluating step(0): 0.8 across 50 samples, Max potential: 0.8: 100%|██████████| 50/50 [00:00<00:00, 347.01it/s]\n",
@@ -1097,8 +1077,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Initial validation score: 0.8\n",
"Initial test score: 0.83\n",
@@ -1107,8 +1087,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training Step: 1: 0%| | 0/13 [00:00, ?it/s]\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.39it/s]\n",
@@ -1120,16 +1100,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Skipping batch 0 as acc: 1.0\n",
"No proposal can improve the subset and full set, go to next step\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 384.73it/s]\n",
@@ -1141,16 +1121,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Skipping batch 1 as acc: 1.0\n",
"No proposal can improve the subset and full set, go to next step\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 193.44it/s]\n",
@@ -1162,16 +1142,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Skipping batch 2 as acc: 1.0\n",
"No proposal can improve the subset and full set, go to next step\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 234.44it/s]\n",
@@ -1183,16 +1163,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Skipping batch 3 as acc: 1.0\n",
"No proposal can improve the subset and full set, go to next step\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.95it/s]\n",
@@ -1203,8 +1183,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 3\n",
"Moving batch error size: 1\n",
@@ -1224,23 +1204,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1249,15 +1229,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass subset check: 1.0 > 0.6666666666666666\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1267,16 +1247,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass full check: 1.0 >= 0.75\n",
"Done with proposals\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1139.66it/s]\n",
@@ -1284,15 +1264,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer step: 0.84 > 0.8\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 100/100 [00:00<00:00, 1658.72it/s]\n",
@@ -1306,8 +1286,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 3\n",
"Moving batch error size: 1\n",
@@ -1327,23 +1307,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1354,16 +1334,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1374,16 +1354,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1394,16 +1374,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1414,16 +1394,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1435,8 +1415,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"Done with proposals\n",
@@ -1444,8 +1424,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 59.06it/s]\n",
@@ -1456,8 +1436,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 7\n",
"Moving batch error size: 1\n",
@@ -1476,23 +1456,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1501,15 +1481,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass subset check: 1.0 > 0.6666666666666666\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1519,16 +1499,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass full check: 0.875 >= 0.875\n",
"Done with proposals\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2336.58it/s]\n",
@@ -1537,15 +1517,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.84 <= 0.84\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 148.75it/s]\n",
@@ -1556,8 +1536,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 11\n",
"Moving batch error size: 1\n",
@@ -1575,23 +1555,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1602,16 +1582,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified in words. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1622,16 +1602,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1642,16 +1622,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1662,16 +1642,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified in words. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1683,8 +1663,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"Done with proposals\n",
@@ -1692,8 +1672,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 87.73it/s]\n",
@@ -1704,8 +1684,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 14\n",
"Moving batch error size: 2\n",
@@ -1727,23 +1707,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1752,15 +1732,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass subset check: 0.8333333333333334 > 0.6666666666666666\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1770,16 +1750,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass full check: 0.875 >= 0.875\n",
"Done with proposals\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1112.82it/s]\n",
@@ -1787,15 +1767,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer step: 0.86 > 0.84\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 100/100 [00:00<00:00, 2395.58it/s]\n",
@@ -1810,16 +1790,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Skipping batch 9 as acc: 1.0\n",
"No proposal can improve the subset and full set, go to next step\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.95it/s]\n",
@@ -1830,8 +1810,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 3\n",
"Moving batch error size: 1\n",
@@ -1851,23 +1831,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1878,16 +1858,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1898,16 +1878,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to categories and quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1916,15 +1896,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass subset check: 1.0 > 0.6666666666666666\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -1934,16 +1914,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass full check: 1.0 >= 0.75\n",
"Done with proposals\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1732.93it/s]\n",
@@ -1952,15 +1932,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.8048780487804879 <= 0.86\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 128.86it/s]\n",
@@ -1971,8 +1951,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 6\n",
"Moving batch error size: 2\n",
@@ -1993,23 +1973,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -2020,16 +2000,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -2040,16 +2020,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -2060,16 +2040,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each relevant item, excluding any that do not fit the category. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -2080,16 +2060,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each relevant item, excluding any that do not fit the category. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -2102,8 +2082,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"Done with proposals\n",
@@ -2293,53 +2273,53 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training Step: 13: 0%| | 0/12 [00:00, ?it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 13\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 158.10it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 6c34d6e5-0e3d-4243-834e-fd6c5883f467 already exists. Updating the trace.Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n",
"\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 3.35it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n",
"Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 490.46it/s]\n",
@@ -2353,16 +2333,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['c42fea48-1b90-4388-92c4-b65b4356a3a2']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Trombone: 1\\n\\n 2. Violin: 1\\n\\n 3. Clarinet: 1\\n\\n 4. Accordion: 1\\n\\n 5. Flutes: 4\\n\\n 6. Trumpet: 1\\n\\n 7. Drums: 2\\n\\n 8. Piano: 1\\n\\n\\n Now, let''s add them up:\\n\\n\\n 1 + 1 + 1 + 1 + 4 + 1 + 2 + 1 = 12\\n\\n\\n Answer: 12'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2578.13it/s]\n",
@@ -2371,38 +2351,38 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.7575757575757576 <= 0.86, revert\n",
"Training Step: 14\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.94it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 3.92it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.\n",
"Trace with id 2bc992c0-9832-47f1-87c3-9f6e4b18ee99 already exists. Updating the trace.\n",
@@ -2410,8 +2390,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 443.10it/s]\n",
@@ -2425,16 +2405,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them up:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1784.60it/s]\n",
@@ -2443,60 +2423,60 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.7575757575757576 <= 0.86, revert\n",
"Training Step: 15\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 164.67it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 660c5004-35d2-4a6d-9a06-1e0b3f032f21 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 25%|██▌ | 1/4 [00:00<00:02, 1.12it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n",
"Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 3.02it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 665.05it/s]\n",
@@ -2510,16 +2490,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them up:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2982.93it/s]\n",
@@ -2528,24 +2508,24 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.7575757575757576 <= 0.86, revert\n",
"Training Step: 16\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 127.68it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n",
"Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n",
@@ -2553,22 +2533,22 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 534.68it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 201.71it/s]\n",
@@ -2582,16 +2562,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them up:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3902.04it/s]\n",
@@ -2600,59 +2580,59 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.7575757575757576 <= 0.86, revert\n",
"Training Step: 17\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 0%| | 0/4 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rLoading Data: 100%|██████████| 4/4 [00:00<00:00, 106.99it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 75%|███████▌ | 3/4 [00:00<00:00, 3.09it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 2.92it/s]\n",
"\n",
@@ -2667,16 +2647,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them up:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2888.08it/s]\n",
@@ -2684,38 +2664,38 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.7575757575757576 <= 0.86, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 18: 67%|██████▋ | 8/12 [01:02<00:19, 4.87s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 18\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 111.28it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n",
"Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace.\n",
@@ -2724,8 +2704,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 585.96it/s]\n",
"\n",
@@ -2740,16 +2720,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them up:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2422.27it/s]\n",
@@ -2758,16 +2738,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.7575757575757576 <= 0.86, revert\n",
"Training Step: 19\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 57.52it/s]\n",
@@ -2775,31 +2755,31 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.\n",
"Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 3.16it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n",
"\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 193.38it/s]\n",
@@ -2813,16 +2793,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them up:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3644.75it/s]\n",
@@ -2830,74 +2810,74 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.7575757575757576 <= 0.86, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 20: 92%|█████████▏| 11/12 [01:09<00:04, 4.32s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 20\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 125.16it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.\n",
"Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 0%| | 0/4 [00:00, ?it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 4.20it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 328.35it/s]\n",
@@ -2911,16 +2891,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['60866bed-8020-4610-a39a-a4a730c035db']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Bed: 1\\n\\n 2. Fridge: 1\\n\\n 3. Lamp: 1\\n\\n 4. Toaster: 1\\n\\n 5. Chairs: 4\\n\\n 6. Table: 1\\n\\n\\n Now, add them all together:\\n\\n\\n 1 + 1 + 1 + 1 + 4 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1763.23it/s]\n",
@@ -2929,24 +2909,24 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.68 <= 0.86, revert\n",
"Training Step: 21\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 208.10it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n",
"Trace with id 04e77795-cc9b-4530-a883-5f775e3fbc76 already exists. Updating the trace.\n",
@@ -2954,22 +2934,22 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 4.56it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 433650a5-ca75-4867-b235-3af4a7c55c67 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 187.26it/s]\n",
@@ -2983,16 +2963,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them up:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3183.48it/s]\n",
@@ -3001,61 +2981,61 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.7575757575757576 <= 0.86, revert\n",
"Training Step: 22\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 0%| | 0/4 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 86.81it/s]\n",
"Training: 0%| | 0/4 [00:00, ?it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 3.23it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n",
"Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 143.58it/s]\n",
@@ -3069,16 +3049,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['60866bed-8020-4610-a39a-a4a730c035db']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Bed: 1\\n\\n 2. Fridge: 1\\n\\n 3. Lamp: 1\\n\\n 4. Toaster: 1\\n\\n 5. Chairs: 4\\n\\n 6. Table: 1\\n\\n\\n Now, add them all together:\\n\\n\\n 1 + 1 + 1 + 1 + 4 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5440.79it/s]\n",
@@ -3086,38 +3066,38 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.68 <= 0.86, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 23: : 14it [01:42, 6.13s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 23\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 91.93it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
"Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
@@ -3125,22 +3105,22 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 3.56it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 63.89it/s]\n",
@@ -3154,16 +3134,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them up:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3010.90it/s]\n",
@@ -3172,59 +3152,59 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.7575757575757576 <= 0.86, revert\n",
"Training Step: 24\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 122.52it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 96c716a1-e984-4fe3-9ce0-e156ac709edb already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 0%| | 0/4 [00:00, ?it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 000a3738-1f09-40b0-9f8b-2dec63a3f7f8 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 3.21it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 106.06it/s]\n",
@@ -3238,16 +3218,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['840d9ed5-8222-45a9-a406-7445feae9733']\n",
"New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Table: 1\\n\\n 3. Fridge: 1\\n\\n 4. Stove: 1\\n\\n 5. Oven: 1\\n\\n 6. Toaster: 1\\n\\n 7. Couch: 1\\n\\n 8. Cars: 4\\n\\n\\n Now, add them up:\\n\\n\\n 1 + 1 + 1 + 1 + 1 + 1 + 1 + 4 = 11\\n\\n\\n Answer: 11'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1210.01it/s]\n",
@@ -3256,8 +3236,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.86 <= 0.86, revert\n",
"Saved ckpt to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
@@ -3266,16 +3246,25 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n"
]
}
+ ],
+ "source": [
+ "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
+ " raw_shots=0, bootstrap_shots=1,\n",
+ " exclude_input_fields_from_bootstrap_demos=True\n",
+ " )"
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "KAyFhzrG_J4l"
+ },
"source": [
"Here is our scores for each step:\n",
"\n",
@@ -3347,13 +3336,13 @@
" resume_from_ckpt=resume_from_ckpt,\n",
" )\n",
"```"
- ],
- "metadata": {
- "id": "KAyFhzrG_J4l"
- }
+ ]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "953BV81y0JFv"
+ },
"source": [
"# 🔥 Resume Checkpoint\n",
"\n",
@@ -3362,22 +3351,11 @@
"This is easy to do.\n",
"\n",
"**Note: Ensure you copy the path you had, and replace it, as your run might create a different file name.**"
- ],
- "metadata": {
- "id": "953BV81y0JFv"
- }
+ ]
},
{
"cell_type": "code",
- "source": [
- "\n",
- "ckpt_path = \"/content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\"\n",
- "\n",
- "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
- " raw_shots=0, bootstrap_shots=1,\n",
- " resume_from_ckpt=ckpt_path,\n",
- " exclude_input_fields_from_bootstrap_demos=True)"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -3385,18 +3363,17 @@
"id": "kde1V1AE7Ty0",
"outputId": "52d69b69-0a3a-4780-ca26-25956cc023c7"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
"ObjectCountAdalComponent(\n",
@@ -3518,8 +3495,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training Step: 27: 0%| | 0/13 [00:00, ?it/s]\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 417.64it/s]\n",
@@ -3531,16 +3508,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Skipping batch 0 as acc: 1.0\n",
"No proposal can improve the subset and full set, go to next step\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 604.56it/s]\n",
@@ -3552,16 +3529,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Skipping batch 1 as acc: 1.0\n",
"No proposal can improve the subset and full set, go to next step\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 318.87it/s]\n",
@@ -3572,8 +3549,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 3\n",
"Moving batch error size: 1\n",
@@ -3593,23 +3570,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -3620,16 +3597,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities specified in the input. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -3640,16 +3617,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -3658,15 +3635,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass subset check: 1.0 > 0.6666666666666666\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -3676,16 +3653,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass full check: 1.0 >= 0.75\n",
"Done with proposals\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2287.37it/s]\n",
@@ -3694,15 +3671,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.7948717948717948 <= 0.86\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 268.93it/s]\n",
@@ -3713,8 +3690,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 7\n",
"Moving batch error size: 1\n",
@@ -3732,23 +3709,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure you account for all items. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -3759,16 +3736,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -3777,15 +3754,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass subset check: 1.0 > 0.6666666666666666\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -3795,16 +3772,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass full check: 1.0 >= 0.875\n",
"Done with proposals\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1910.10it/s]\n",
@@ -3813,15 +3790,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.6923076923076923 <= 0.86\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 310.31it/s]\n",
@@ -3832,8 +3809,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 11\n",
"Moving batch error size: 1\n",
@@ -3851,23 +3828,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -3876,15 +3853,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass subset check: 1.0 > 0.6666666666666666\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -3894,16 +3871,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass full check: 1.0 >= 0.9166666666666666\n",
"Done with proposals\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2233.56it/s]\n",
@@ -3912,15 +3889,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.8333333333333334 <= 0.86\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 269.31it/s]\n",
@@ -3931,8 +3908,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 15\n",
"Moving batch error size: 1\n",
@@ -3951,23 +3928,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -3985,16 +3962,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4005,16 +3982,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4025,16 +4002,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4045,16 +4022,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4066,8 +4043,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"Done with proposals\n",
@@ -4075,8 +4052,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 317.05it/s]\n",
@@ -4087,8 +4064,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 18\n",
"Moving batch error size: 2\n",
@@ -4103,15 +4080,15 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"setting pred name Generator_outputy_pred_3 score to 1.0\n",
"setting pred name Generator_outputy_pred_2 score to 0.0\n",
@@ -4122,23 +4099,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4149,16 +4126,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4169,16 +4146,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4189,16 +4166,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4209,16 +4186,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4230,8 +4207,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"Done with proposals\n",
@@ -4239,8 +4216,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 104.68it/s]\n",
@@ -4251,8 +4228,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 22\n",
"Moving batch error size: 2\n",
@@ -4273,23 +4250,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4300,16 +4277,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4320,16 +4297,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4340,16 +4317,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4360,16 +4337,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4381,8 +4358,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
"Done with proposals\n",
@@ -4390,8 +4367,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 70.79it/s]\n",
@@ -4402,8 +4379,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 22\n",
"Moving batch error size: 2\n",
@@ -4423,23 +4400,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4450,16 +4427,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4468,15 +4445,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass subset check: 1.0 > 0.6666666666666666\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4486,16 +4463,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass full check: 0.95 >= 0.9\n",
"Done with proposals\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2667.62it/s]\n",
@@ -4503,15 +4480,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.8333333333333334 <= 0.86\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training Step: 36: 69%|██████▉ | 9/13 [03:21<01:29, 22.39s/it]\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 154.85it/s]\n",
@@ -4522,8 +4499,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 22\n",
"Moving batch error size: 2\n",
@@ -4541,23 +4518,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4568,16 +4545,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4588,16 +4565,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4608,16 +4585,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4628,16 +4605,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4649,8 +4626,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"Done with proposals\n",
@@ -4658,8 +4635,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 115.54it/s]\n",
@@ -4670,8 +4647,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 23\n",
"Moving batch error size: 1\n",
@@ -4689,23 +4666,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4716,16 +4693,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4736,16 +4713,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4756,16 +4733,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4776,16 +4753,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4797,8 +4774,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"Done with proposals\n",
@@ -4806,8 +4783,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 138.49it/s]\n",
@@ -4818,8 +4795,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Moving batch correct size: 22\n",
"Moving batch error size: 2\n",
@@ -4839,23 +4816,23 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Proposing: 0%| | 0/5 [00:00, ?it/s]\u001b[A"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4866,16 +4843,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4886,16 +4863,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4906,16 +4883,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4926,16 +4903,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"\n",
@@ -4948,8 +4925,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
"Done with proposals\n",
@@ -5139,30 +5116,30 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training Step: 39: 0%| | 0/12 [00:00, ?it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 39\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 161.31it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 54e272c5-1360-462e-b773-4c58c61472ee already exists. Updating the trace.\n",
"Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.\n",
@@ -5171,8 +5148,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 812.53it/s]\n",
"\n",
@@ -5187,16 +5164,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['0e8910c8-703d-4766-a483-c5691125fd03']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Fridge\\n\\n 2. Chair\\n\\n 3. Bed\\n\\n 4. Oven\\n\\n 5. Microwave\\n\\n 6. Car\\n\\n\\n There are 6 objects in total.\\n\\n\\n Answer: 6'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3194.64it/s]\n",
@@ -5205,47 +5182,47 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.6521739130434783 <= 0.86, revert\n",
"Training Step: 40\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 697.57it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
"Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 562.43it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n",
"\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 577.17it/s]\n",
@@ -5259,16 +5236,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them all together:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3973.84it/s]\n",
@@ -5276,38 +5253,38 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.8048780487804879 <= 0.86, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 41: 33%|███▎ | 4/12 [00:22<00:49, 6.19s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 41\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 155.20it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n",
"Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n",
@@ -5316,8 +5293,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 1098.13it/s]\n",
"\n",
@@ -5332,16 +5309,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them all together:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3444.16it/s]\n",
@@ -5349,38 +5326,38 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.8048780487804879 <= 0.86, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 42: 42%|████▏ | 5/12 [00:24<00:25, 3.71s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 42\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 268.35it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n",
"Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n",
@@ -5388,8 +5365,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 522.44it/s]\n",
"\n",
@@ -5404,16 +5381,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them all together:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4718.96it/s]\n",
@@ -5422,24 +5399,24 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.8048780487804879 <= 0.86, revert\n",
"Training Step: 43\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 261.59it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.\n",
"\n",
@@ -5447,22 +5424,22 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 428.10it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 296.10it/s]\n",
@@ -5476,16 +5453,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them all together:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4960.15it/s]\n",
@@ -5494,24 +5471,24 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.8048780487804879 <= 0.86, revert\n",
"Training Step: 44\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 237.83it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n",
"Trace with id 2bc992c0-9832-47f1-87c3-9f6e4b18ee99 already exists. Updating the trace.Trace with id 945f82c7-03d9-4f49-8267-be7abac2bce6 already exists. Updating the trace.\n",
@@ -5519,22 +5496,22 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 1138.91it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 394.77it/s]\n",
@@ -5548,16 +5525,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['aefd17e5-9682-4420-a820-c484a63d6dcd']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each vegetable step by step:\\n\\n\\n 1. Carrot: 1\\n\\n 2. Onion: 1\\n\\n 3. Stalk of celery: 1\\n\\n 4. Yams: 3\\n\\n 5. Garlic: 1\\n\\n 6. Head of broccoli: 1\\n\\n 7. Potato: 1\\n\\n\\n Now, let''s add them up:\\n\\n\\n 1 + 1 + 1 + 3 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1197.95it/s]\n",
@@ -5566,24 +5543,24 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.813953488372093 <= 0.86, revert\n",
"Training Step: 45\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 164.91it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n",
"Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
@@ -5592,8 +5569,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 731.86it/s]\n",
"\n",
@@ -5608,16 +5585,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them all together:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2621.44it/s]\n",
@@ -5625,61 +5602,61 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.8048780487804879 <= 0.86, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 46: 92%|█████████▏| 11/12 [00:59<00:06, 6.78s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 46\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 256.89it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n",
"Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 426.47it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n",
"\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 266.65it/s]\n",
@@ -5693,16 +5670,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['aefd17e5-9682-4420-a820-c484a63d6dcd']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each vegetable step by step:\\n\\n\\n 1. Carrot: 1\\n\\n 2. Onion: 1\\n\\n 3. Stalk of celery: 1\\n\\n 4. Yams: 3\\n\\n 5. Garlic: 1\\n\\n 6. Head of broccoli: 1\\n\\n 7. Potato: 1\\n\\n\\n Now, let''s add them up:\\n\\n\\n 1 + 1 + 1 + 3 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4016.92it/s]\n",
@@ -5710,38 +5687,38 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.813953488372093 <= 0.86, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 47: 100%|██████████| 12/12 [01:01<00:00, 4.42s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 47\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 96.23it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n",
"\n",
@@ -5749,22 +5726,22 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 341.47it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 167.75it/s]\n",
@@ -5778,16 +5755,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them all together:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2836.52it/s]\n",
@@ -5795,61 +5772,61 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.8048780487804879 <= 0.86, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 48: : 13it [01:07, 4.63s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 48\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 189.96it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 96c716a1-e984-4fe3-9ce0-e156ac709edb already exists. Updating the trace.\n",
"Trace with id 3835ee47-6951-49ec-b285-621fc1085024 already exists. Updating the trace.Trace with id 99607986-e107-46b8-b86b-177b295983c4 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 295.41it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"\n",
"Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 161.24it/s]\n",
@@ -5863,16 +5840,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s list and count the vegetables mentioned:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (two cabbages)\\n\\n 4. Garlic\\n\\n 5. Carrot\\n\\n 6. Broccoli (head of broccoli)\\n\\n 7. Potato\\n\\n 8. Celery (stalk of celery)\\n\\n 9. Lettuce (lettuce head)\\n\\n\\n Now, let''s count each vegetable:\\n\\n\\n 1. Yam: 1\\n\\n 2. Cauliflower: 1\\n\\n 3. Cabbages: 2\\n\\n 4. Garlic: 1\\n\\n 5. Carrot: 1\\n\\n 6. Broccoli: 1\\n\\n 7. Potato: 1\\n\\n 8. Celery: 1\\n\\n 9. Lettuce: 1\\n\\n\\n Adding them up:\\n\\n\\n 1 + 1 + 2 + 1 + 1 + 1 + 1 + 1 + 1 = 10\\n\\n\\n Answer: 10'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1430.74it/s]\n",
@@ -5881,24 +5858,24 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.8048780487804879 <= 0.86, revert\n",
"Training Step: 49\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 122.71it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n",
"Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n",
@@ -5907,8 +5884,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 421.38it/s]\n",
"\n",
@@ -5923,16 +5900,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them all together:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3168.14it/s]\n",
@@ -5940,38 +5917,38 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.8048780487804879 <= 0.86, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 50: : 16it [01:42, 9.33s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 50\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 108.30it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n",
"Trace with id 660c5004-35d2-4a6d-9a06-1e0b3f032f21 already exists. Updating the trace.\n",
@@ -5979,8 +5956,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:03<00:00, 1.04it/s]\n",
"\n",
@@ -5995,16 +5972,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
"New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n 1. Microwave: 1\\n\\n 2. Lamp: 1\\n\\n 3. Cars: 4\\n\\n 4. Stove: 1\\n\\n 5. Toaster: 1\\n\\n 6. Bed: 1\\n\\n\\n Now, add them all together:\\n\\n\\n 1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n Answer: 9'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2261.91it/s]\n",
@@ -6012,49 +5989,52 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.8048780487804879 <= 0.86, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 50: 100%|██████████| 12/12 [01:49<00:00, 9.15s/it]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Saved ckpt to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
"Training time: 352.5873613357544s\n",
"ckpt_file: /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
}
+ ],
+ "source": [
+ "\n",
+ "ckpt_path = \"/content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\"\n",
+ "\n",
+ "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
+ " raw_shots=0, bootstrap_shots=1,\n",
+ " resume_from_ckpt=ckpt_path,\n",
+ " exclude_input_fields_from_bootstrap_demos=True)"
]
},
{
"cell_type": "markdown",
- "source": [
- "I decide to try more, this time, using strategy \"random\". And in the bootstrap demo, there is one shot, but I ensure I also add the \"input\" in the demonstration."
- ],
"metadata": {
"id": "m5fZGQqLE78r"
- }
+ },
+ "source": [
+ "I decide to try more, this time, using strategy \"random\". And in the bootstrap demo, there is one shot, but I ensure I also add the \"input\" in the demonstration."
+ ]
},
{
"cell_type": "code",
- "source": [
- "\n",
- "train(debug=False, max_steps=12, strategy=\"random\",\n",
- " raw_shots=0, bootstrap_shots=1,\n",
- " resume_from_ckpt=ckpt_path,\n",
- " exclude_input_fields_from_bootstrap_demos=False)"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -6062,18 +6042,17 @@
"id": "78JAv4ULEn07",
"outputId": "e87bb360-fc26-4dbd-d163-86ab32c292df"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
"ObjectCountAdalComponent(\n",
@@ -6195,8 +6174,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training Step: 51: 0%| | 0/13 [00:00, ?it/s]\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 415.27it/s]\n",
@@ -6207,22 +6186,22 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"setting pred name Generator_outputy_pred_2 score to 1.0\n",
"setting pred name Generator_outputy_pred_1 score to 1.0\n",
@@ -6233,8 +6212,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2199.38it/s]\n",
@@ -6243,16 +6222,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.86 <= 0.86\n",
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 402.10it/s]\n",
@@ -6263,8 +6242,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_2 score to 1.0\n",
@@ -6276,8 +6255,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1760.33it/s]\n",
@@ -6286,16 +6265,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.86 <= 0.86\n",
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 571.26it/s]\n",
@@ -6306,8 +6285,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_2 score to 1.0\n",
@@ -6319,8 +6298,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2074.29it/s]\n",
@@ -6328,15 +6307,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer step: 0.88 > 0.86\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 100/100 [00:00<00:00, 5848.08it/s]\n",
@@ -6345,15 +6324,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 297.78it/s]\n",
@@ -6364,8 +6343,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_1 score to 1.0\n",
@@ -6377,8 +6356,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1274.72it/s]\n",
@@ -6386,15 +6365,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer step: 0.94 > 0.88\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 100/100 [00:00<00:00, 6831.78it/s]\n",
@@ -6403,15 +6382,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 152.84it/s]\n",
@@ -6422,8 +6401,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_0 score to 1.0\n",
@@ -6435,8 +6414,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2011.16it/s]\n",
@@ -6445,16 +6424,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.8333333333333334 <= 0.94\n",
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.66it/s]\n",
@@ -6465,8 +6444,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_3 score to 1.0\n",
@@ -6478,8 +6457,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4572.35it/s]\n",
@@ -6487,16 +6466,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.94 <= 0.94\n",
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 57: 46%|████▌ | 6/13 [02:54<03:02, 26.03s/it]\n",
@@ -6508,8 +6487,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_2 score to 1.0\n",
@@ -6521,8 +6500,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1614.47it/s]\n",
@@ -6531,16 +6510,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.7333333333333333 <= 0.94\n",
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 137.96it/s]\n",
@@ -6551,8 +6530,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_2 score to 1.0\n",
@@ -6564,8 +6543,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3560.17it/s]\n",
@@ -6574,16 +6553,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.8461538461538461 <= 0.94\n",
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.90it/s]\n",
@@ -6594,8 +6573,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_1 score to 1.0\n",
@@ -6607,8 +6586,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1770.11it/s]\n",
@@ -6617,16 +6596,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.9069767441860465 <= 0.94\n",
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 314.86it/s]\n",
@@ -6637,8 +6616,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_0 score to 1.0\n",
@@ -6650,8 +6629,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 7188.43it/s]\n",
@@ -6660,16 +6639,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.8666666666666667 <= 0.94\n",
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 95.68it/s]\n",
@@ -6680,8 +6659,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_1 score to 1.0\n",
@@ -6693,8 +6672,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3036.62it/s]\n",
@@ -6702,16 +6681,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.9069767441860465 <= 0.94\n",
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 62: 85%|████████▍ | 11/13 [04:44<00:40, 20.14s/it]\n",
@@ -6723,8 +6702,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Loss backward...\n",
"setting pred name Generator_outputy_pred_2 score to 1.0\n",
@@ -6736,8 +6715,8 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5035.06it/s]\n",
@@ -6745,16 +6724,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Optimizer revert: 0.9069767441860465 <= 0.94\n",
"Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 62: 92%|█████████▏| 12/13 [04:51<00:24, 24.28s/it]\n",
@@ -6762,8 +6741,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Reached max steps\n",
"cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
@@ -6950,52 +6929,52 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training Step: 63: 0%| | 0/12 [00:00, ?it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 63\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 175.38it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n",
"Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 0%| | 0/4 [00:00, ?it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 4.32it/s]\n",
"\n",
@@ -7010,16 +6989,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n 1. Two heads of broccoli (vegetables)\\n\\n 2. An apple (not a vegetable)\\n\\n 3. A lettuce head (vegetable)\\n\\n 4. Two onions (vegetables)\\n\\n\\n Now, let''s count the vegetables:\\n\\n\\n 1. Two heads of broccoli\\n\\n 2. One lettuce head\\n\\n 3. Two onions\\n\\n\\n Total number of vegetables:\\n\\n 2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n Answer: 5'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2838.94it/s]\n",
@@ -7028,61 +7007,61 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.42857142857142855 <= 0.94, revert\n",
"Training Step: 64\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 173.87it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n",
"Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace."
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 0%| | 0/4 [00:00, ?it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 3835ee47-6951-49ec-b285-621fc1085024 already exists. Updating the trace.\n",
"\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:00<00:00, 4.64it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 1138.44it/s]\n",
@@ -7096,16 +7075,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n 1. Two heads of broccoli (vegetables)\\n\\n 2. An apple (not a vegetable)\\n\\n 3. A lettuce head (vegetable)\\n\\n 4. Two onions (vegetables)\\n\\n\\n Now, let''s count the vegetables:\\n\\n\\n 1. Two heads of broccoli\\n\\n 2. One lettuce head\\n\\n 3. Two onions\\n\\n\\n Total number of vegetables:\\n\\n 2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n Answer: 5'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2971.02it/s]\n",
@@ -7114,73 +7093,73 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.42857142857142855 <= 0.94, revert\n",
"Training Step: 65\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 201.47it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 99607986-e107-46b8-b86b-177b295983c4 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 0%| | 0/4 [00:00, ?it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 50%|█████ | 2/4 [00:00<00:00, 2.54it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 2.89it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 402.70it/s]\n",
@@ -7194,16 +7173,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Now, let''s verify the count:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Total number of vegetables: 10\\n\\n\\n Answer: 10'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1635.33it/s]\n",
@@ -7211,15 +7190,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Pass validation: 0.96 > 0.94\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 100/100 [00:00<00:00, 3294.35it/s]\n",
@@ -7228,15 +7207,15 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 66\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 186.04it/s]\n",
@@ -7244,44 +7223,44 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 50%|█████ | 2/4 [00:01<00:01, 1.30it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n",
"Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:02<00:00, 1.46it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 636.54it/s]\n",
@@ -7295,16 +7274,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Now, let''s verify the count:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Total number of vegetables: 10\\n\\n\\n Answer: 10'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1120.89it/s]\n",
@@ -7313,16 +7292,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.96 <= 0.96, revert\n",
"Training Step: 67\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 0%| | 0/4 [00:00, ?it/s]\u001b[A\n",
@@ -7331,57 +7310,57 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 25%|██▌ | 1/4 [00:01<00:05, 1.78s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 75%|███████▌ | 3/4 [00:02<00:00, 1.63it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:03<00:00, 1.04it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 420.84it/s]\n",
@@ -7395,16 +7374,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Now, let''s verify the count:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Total number of vegetables: 10\\n\\n\\n Answer: 10'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4202.88it/s]\n",
@@ -7412,30 +7391,30 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.96 <= 0.96, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 68: 67%|██████▋ | 8/12 [02:02<00:47, 11.99s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 68\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 77.30it/s]\n",
@@ -7443,43 +7422,43 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 75%|███████▌ | 3/4 [00:01<00:00, 2.62it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:02<00:00, 1.46it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 54e272c5-1360-462e-b773-4c58c61472ee already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 212.56it/s]\n",
@@ -7493,16 +7472,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Now, let''s verify the count:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Total number of vegetables: 10\\n\\n\\n Answer: 10'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3305.62it/s]\n",
@@ -7510,30 +7489,30 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.96 <= 0.96, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 69: 75%|███████▌ | 9/12 [02:09<00:32, 10.69s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 69\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 84.70it/s]\n",
@@ -7541,8 +7520,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.\n",
"Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n",
@@ -7550,22 +7529,22 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 2.67it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 331.49it/s]\n",
@@ -7579,16 +7558,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n 1. Two heads of broccoli (vegetables)\\n\\n 2. An apple (not a vegetable)\\n\\n 3. A lettuce head (vegetable)\\n\\n 4. Two onions (vegetables)\\n\\n\\n Now, let''s count the vegetables:\\n\\n\\n 1. Two heads of broccoli\\n\\n 2. One lettuce head\\n\\n 3. Two onions\\n\\n\\n Total number of vegetables:\\n\\n 2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n Answer: 5'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4604.98it/s]\n",
@@ -7597,16 +7576,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.4 <= 0.96, revert\n",
"Training Step: 70\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 169.70it/s]\n",
@@ -7614,31 +7593,31 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
"Trace with id 6c34d6e5-0e3d-4243-834e-fd6c5883f467 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:01<00:00, 2.45it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n",
"Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 285.47it/s]\n",
@@ -7652,16 +7631,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Now, let''s verify the count:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Total number of vegetables: 10\\n\\n\\n Answer: 10'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4087.46it/s]\n",
@@ -7669,30 +7648,30 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.96 <= 0.96, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 71: 100%|██████████| 12/12 [02:17<00:00, 6.07s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 71\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 87.52it/s]\n",
@@ -7700,57 +7679,57 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 50%|█████ | 2/4 [00:01<00:01, 1.37it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 75%|███████▌ | 3/4 [00:01<00:00, 1.92it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:02<00:00, 1.87it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.50it/s]\n",
@@ -7764,16 +7743,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Now, let''s verify the count:\\n\\n\\n 1. Yam\\n\\n 2. Cauliflower\\n\\n 3. Cabbage (1)\\n\\n 4. Cabbage (2)\\n\\n 5. Garlic\\n\\n 6. Carrot\\n\\n 7. Broccoli\\n\\n 8. Potato\\n\\n 9. Celery\\n\\n 10. Lettuce\\n\\n\\n Total number of vegetables: 10\\n\\n\\n Answer: 10'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4407.91it/s]\n",
@@ -7781,30 +7760,30 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.96 <= 0.96, revert\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Training Step: 72: : 13it [02:23, 6.04s/it]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Training Step: 72\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 113.31it/s]\n",
@@ -7812,57 +7791,57 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 50%|█████ | 2/4 [00:01<00:01, 1.82it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 433650a5-ca75-4867-b235-3af4a7c55c67 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\rTraining: 75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s]"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:02<00:00, 1.86it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 170.72it/s]\n",
@@ -7876,16 +7855,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n 1. Two heads of broccoli (vegetables)\\n\\n 2. An apple (not a vegetable)\\n\\n 3. A lettuce head (vegetable)\\n\\n 4. Two onions (vegetables)\\n\\n\\n Now, let''s count the vegetables:\\n\\n\\n 1. Two heads of broccoli\\n\\n 2. One lettuce head\\n\\n 3. Two onions\\n\\n\\n Total number of vegetables:\\n\\n 2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n Answer: 5'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3970.90it/s]\n",
@@ -7894,16 +7873,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.4 <= 0.96, revert\n",
"Training Step: 73\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 73.23it/s]\n",
@@ -7911,31 +7890,31 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
"Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:02<00:00, 1.80it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 04e77795-cc9b-4530-a883-5f775e3fbc76 already exists. Updating the trace.\n",
"Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 211.00it/s]\n",
@@ -7949,16 +7928,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n 1. Two heads of broccoli (vegetables)\\n\\n 2. An apple (not a vegetable)\\n\\n 3. A lettuce head (vegetable)\\n\\n 4. Two onions (vegetables)\\n\\n\\n Now, let''s count the vegetables:\\n\\n\\n 1. Two heads of broccoli\\n\\n 2. One lettuce head\\n\\n 3. Two onions\\n\\n\\n Total number of vegetables:\\n\\n 2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n Answer: 5'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2226.42it/s]\n",
@@ -7967,16 +7946,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.4 <= 0.96, revert\n",
"Training Step: 74\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.65it/s]\n",
@@ -7984,8 +7963,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id 945f82c7-03d9-4f49-8267-be7abac2bce6 already exists. Updating the trace.\n",
"Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n",
@@ -7993,22 +7972,22 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Training: 100%|██████████| 4/4 [00:02<00:00, 1.59it/s]\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 4/4 [00:00<00:00, 115.74it/s]\n",
@@ -8022,16 +8001,16 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"sampled_augmented_demos: ['b538075d-01af-4b76-b835-9005f3044609']\n",
"New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a blackberry, a raspberry, a peach, a head of broccoli, a plum,\\n an orange, two bananas, a grape, two garlics, a nectarine, a lettuce head, and an\\n apple. How many fruits do I have?\\nExample: 'Let''s list each item and identify whether it is a fruit:\\n\\n\\n 1. Blackberry - Fruit\\n\\n 2. Raspberry - Fruit\\n\\n 3. Peach - Fruit\\n\\n 4. Head of broccoli - Not a fruit\\n\\n 5. Plum - Fruit\\n\\n 6. Orange - Fruit\\n\\n 7. Two bananas - Fruits (2 bananas)\\n\\n 8. Grape - Fruit\\n\\n 9. Two garlics - Not fruits\\n\\n 10. Nectarine - Fruit\\n\\n 11. Lettuce head - Not a fruit\\n\\n 12. Apple - Fruit\\n\\n\\n Now, let''s count the fruits:\\n\\n\\n 1. Blackberry\\n\\n 2. Raspberry\\n\\n 3. Peach\\n\\n 4. Plum\\n\\n 5. Orange\\n\\n 6. Two bananas (counted as 2)\\n\\n 7. Grape\\n\\n 8. Nectarine\\n\\n 9. Apple\\n\\n\\n Total number of fruits:\\n\\n 1 + 1 + 1 + 1 + 1 + 2 + 1 + 1 + 1 = 10\\n\\n\\n Answer: 10'\", requires_opt=True)]\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n",
"Loading Data: 100%|██████████| 50/50 [00:00<00:00, 634.93it/s]\n",
@@ -8040,8 +8019,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Fail validation: 0.4 <= 0.96, revert\n",
"Saved ckpt to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
@@ -8050,16 +8029,26 @@
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"\n"
]
}
+ ],
+ "source": [
+ "\n",
+ "train(debug=False, max_steps=12, strategy=\"random\",\n",
+ " raw_shots=0, bootstrap_shots=1,\n",
+ " resume_from_ckpt=ckpt_path,\n",
+ " exclude_input_fields_from_bootstrap_demos=False)"
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "xTB4lO3PFPnP"
+ },
"source": [
"Finally, we got 96% on the val and 95% on the test!!! This is really close to GPT4o's performance. This took us 72 steps!\n",
"\n",
@@ -8088,32 +8077,43 @@
"\n",
"\n",
"You will see all steps record from the log."
- ],
- "metadata": {
- "id": "xTB4lO3PFPnP"
- }
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "Happy Optimizing!!!"
- ],
"metadata": {
"id": "Fr0V3XNCHAis"
- }
+ },
+ "source": [
+ "Happy Optimizing!!!"
+ ]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "3Wnvqs3RyI_z"
+ },
"source": [
"# Issues and feedback\n",
"\n",
"If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
"\n",
"For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
- ],
- "metadata": {
- "id": "3Wnvqs3RyI_z"
- }
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
}
- ]
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
}
diff --git a/use_cases/question_answering/bbh/object_count/diagnose.py b/use_cases/question_answering/bbh/object_count/diagnose.py
index cb06b903..e0a99d54 100644
--- a/use_cases/question_answering/bbh/object_count/diagnose.py
+++ b/use_cases/question_answering/bbh/object_count/diagnose.py
@@ -17,7 +17,9 @@ def prepare_task(self, sample: Example):
def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:
y_label = -1
- if y_pred and y_pred.data:
+ if (
+ y_pred is not None and y_pred.data is not None
+ ): # if y_pred and y_pred.data: might introduce bug when the data is 0
y_label = y_pred.data
return self.eval_fn, {"y": y_label, "y_gt": sample.answer}
diff --git a/use_cases/question_answering/bbh/object_count/train_new.py b/use_cases/question_answering/bbh/object_count/train_new.py
index 86654a92..c4c64fbc 100644
--- a/use_cases/question_answering/bbh/object_count/train_new.py
+++ b/use_cases/question_answering/bbh/object_count/train_new.py
@@ -43,7 +43,9 @@ def prepare_eval(
self, sample: Example, y_pred: adal.GeneratorOutput
) -> Tuple[float, Dict[str, Any]]:
y_label = -1
- if y_pred and y_pred.data:
+ if (
+ y_pred is not None and y_pred.data is not None
+ ): # if y_pred and y_pred.data: might introduce bug when the data is 0
y_label = y_pred.data
return self.eval_fn, {"y": y_label, "y_gt": sample.answer}
diff --git a/use_cases/question_answering/bbh/word_sorting/diagnose.py b/use_cases/question_answering/bbh/word_sorting/diagnose.py
index dcd490fc..2faa9e76 100644
--- a/use_cases/question_answering/bbh/word_sorting/diagnose.py
+++ b/use_cases/question_answering/bbh/word_sorting/diagnose.py
@@ -45,7 +45,9 @@ def evaluate_one_sample(
self, sample: Example, y_pred: adal.GeneratorOutput
) -> float:
y_label = ""
- if y_pred and y_pred.data:
+ if (
+ y_pred is not None and y_pred.data is not None
+ ): # if y_pred and y_pred.data: might introduce bug when the data is 0
y_label = y_pred.data
return self.eval_fn(
question=sample.question, gt_answer=sample.answer, pred_answer=y_label
diff --git a/use_cases/question_answering/bbh/word_sorting/train.py b/use_cases/question_answering/bbh/word_sorting/train.py
index a2b1a418..4d1af9e3 100644
--- a/use_cases/question_answering/bbh/word_sorting/train.py
+++ b/use_cases/question_answering/bbh/word_sorting/train.py
@@ -59,7 +59,9 @@ def evaluate_one_sample(
self, sample: Example, y_pred: adal.GeneratorOutput
) -> float:
y_label = ""
- if y_pred and y_pred.data:
+ if (
+ y_pred is not None and y_pred.data is not None
+ ): # if y_pred and y_pred.data: might introduce bug when the data is 0
y_label = y_pred.data
return self.eval_fn(
diff --git a/use_cases/question_answering/bbh/word_sorting/train_paper.py b/use_cases/question_answering/bbh/word_sorting/train_paper.py
index 6d3bd477..00a84830 100644
--- a/use_cases/question_answering/bbh/word_sorting/train_paper.py
+++ b/use_cases/question_answering/bbh/word_sorting/train_paper.py
@@ -59,7 +59,9 @@ def evaluate_one_sample(
self, sample: Example, y_pred: adal.GeneratorOutput
) -> float:
y_label = ""
- if y_pred and y_pred.data:
+ if (
+ y_pred is not None and y_pred.data is not None
+ ): # if y_pred and y_pred.data: might introduce bug when the data is 0
y_label = y_pred.data
return self.eval_fn(
From 838fb1f38b0aaf0f544ba23be34e7c326bdf124f Mon Sep 17 00:00:00 2001
From: Li Yin
Date: Mon, 21 Oct 2024 12:21:23 +0800
Subject: [PATCH 2/2] update poetry lock as the test failed
---
adalflow/poetry.lock | 187 ++++++++++++++++++++++---------------------
1 file changed, 96 insertions(+), 91 deletions(-)
diff --git a/adalflow/poetry.lock b/adalflow/poetry.lock
index f24854be..f50304a7 100644
--- a/adalflow/poetry.lock
+++ b/adalflow/poetry.lock
@@ -251,7 +251,7 @@ files = [
name = "boto3"
version = "1.35.19"
description = "The AWS SDK for Python"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "boto3-1.35.19-py3-none-any.whl", hash = "sha256:84b3fe1727945bc3cada832d969ddb3dc0d08fce1677064ca8bdc13a89c1a143"},
@@ -2162,56 +2162,61 @@ files = [
[[package]]
name = "nvidia-cublas-cu12"
-version = "12.1.3.1"
+version = "12.4.5.8"
description = "CUBLAS native runtime libraries"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
- {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"},
+ {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3"},
+ {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b"},
+ {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-win_amd64.whl", hash = "sha256:5a796786da89203a0657eda402bcdcec6180254a8ac22d72213abc42069522dc"},
]
[[package]]
name = "nvidia-cuda-cupti-cu12"
-version = "12.1.105"
+version = "12.4.127"
description = "CUDA profiling tools runtime libs."
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
- {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"},
+ {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a"},
+ {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb"},
+ {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:5688d203301ab051449a2b1cb6690fbe90d2b372f411521c86018b950f3d7922"},
]
[[package]]
name = "nvidia-cuda-nvrtc-cu12"
-version = "12.1.105"
+version = "12.4.127"
description = "NVRTC native runtime libraries"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
- {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"},
+ {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198"},
+ {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338"},
+ {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:a961b2f1d5f17b14867c619ceb99ef6fcec12e46612711bcec78eb05068a60ec"},
]
[[package]]
name = "nvidia-cuda-runtime-cu12"
-version = "12.1.105"
+version = "12.4.127"
description = "CUDA Runtime native Libraries"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
- {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"},
+ {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3"},
+ {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5"},
+ {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:09c2e35f48359752dfa822c09918211844a3d93c100a715d79b59591130c5e1e"},
]
[[package]]
name = "nvidia-cudnn-cu12"
-version = "8.9.2.26"
+version = "9.1.0.70"
description = "cuDNN runtime libraries"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"},
+ {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"},
+ {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"},
]
[package.dependencies]
@@ -2219,35 +2224,41 @@ nvidia-cublas-cu12 = "*"
[[package]]
name = "nvidia-cufft-cu12"
-version = "11.0.2.54"
+version = "11.2.1.3"
description = "CUFFT native runtime libraries"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
- {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"},
+ {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399"},
+ {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9"},
+ {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-win_amd64.whl", hash = "sha256:d802f4954291101186078ccbe22fc285a902136f974d369540fd4a5333d1440b"},
]
+[package.dependencies]
+nvidia-nvjitlink-cu12 = "*"
+
[[package]]
name = "nvidia-curand-cu12"
-version = "10.3.2.106"
+version = "10.3.5.147"
description = "CURAND native runtime libraries"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
- {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"},
+ {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9"},
+ {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b"},
+ {file = "nvidia_curand_cu12-10.3.5.147-py3-none-win_amd64.whl", hash = "sha256:f307cc191f96efe9e8f05a87096abc20d08845a841889ef78cb06924437f6771"},
]
[[package]]
name = "nvidia-cusolver-cu12"
-version = "11.4.5.107"
+version = "11.6.1.9"
description = "CUDA solver native runtime libraries"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
- {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"},
+ {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e"},
+ {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260"},
+ {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-win_amd64.whl", hash = "sha256:e77314c9d7b694fcebc84f58989f3aa4fb4cb442f12ca1a9bde50f5e8f6d1b9c"},
]
[package.dependencies]
@@ -2257,13 +2268,14 @@ nvidia-nvjitlink-cu12 = "*"
[[package]]
name = "nvidia-cusparse-cu12"
-version = "12.1.0.106"
+version = "12.3.1.170"
description = "CUSPARSE native runtime libraries"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
- {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"},
+ {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3"},
+ {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1"},
+ {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-win_amd64.whl", hash = "sha256:9bc90fb087bc7b4c15641521f31c0371e9a612fc2ba12c338d3ae032e6b6797f"},
]
[package.dependencies]
@@ -2271,35 +2283,36 @@ nvidia-nvjitlink-cu12 = "*"
[[package]]
name = "nvidia-nccl-cu12"
-version = "2.19.3"
+version = "2.21.5"
description = "NVIDIA Collective Communication Library (NCCL) Runtime"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", hash = "sha256:a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d"},
+ {file = "nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0"},
]
[[package]]
name = "nvidia-nvjitlink-cu12"
-version = "12.6.68"
+version = "12.4.127"
description = "Nvidia JIT LTO Library"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b3fd0779845f68b92063ab1393abab1ed0a23412fc520df79a8190d098b5cd6b"},
- {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_x86_64.whl", hash = "sha256:125a6c2a44e96386dda634e13d944e60b07a0402d391a070e8fb4104b34ea1ab"},
- {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-win_amd64.whl", hash = "sha256:a55744c98d70317c5e23db14866a8cc2b733f7324509e941fc96276f9f37801d"},
+ {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83"},
+ {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57"},
+ {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:fd9020c501d27d135f983c6d3e244b197a7ccad769e34df53a42e276b0e25fa1"},
]
[[package]]
name = "nvidia-nvtx-cu12"
-version = "12.1.105"
+version = "12.4.127"
description = "NVIDIA Tools Extension"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
- {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
+ {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3"},
+ {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a"},
+ {file = "nvidia_nvtx_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:641dccaaa1139f3ffb0d3164b4b84f9d253397e38246a4f2f36728b48566d485"},
]
[[package]]
@@ -3130,7 +3143,7 @@ pyasn1 = ">=0.1.3"
name = "s3transfer"
version = "0.10.2"
description = "An Amazon S3 Transfer Manager"
-optional = false
+optional = true
python-versions = ">=3.8"
files = [
{file = "s3transfer-0.10.2-py3-none-any.whl", hash = "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69"},
@@ -3274,13 +3287,13 @@ sqlcipher = ["sqlcipher3_binary"]
[[package]]
name = "sympy"
-version = "1.13.2"
+version = "1.13.1"
description = "Computer algebra system (CAS) in Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "sympy-1.13.2-py3-none-any.whl", hash = "sha256:c51d75517712f1aed280d4ce58506a4a88d635d6b5dd48b39102a7ae1f3fcfe9"},
- {file = "sympy-1.13.2.tar.gz", hash = "sha256:401449d84d07be9d0c7a46a64bd54fe097667d5e7181bfe67ec777be9e01cb13"},
+ {file = "sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8"},
+ {file = "sympy-1.13.1.tar.gz", hash = "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f"},
]
[package.dependencies]
@@ -3521,36 +3534,28 @@ files = [
[[package]]
name = "torch"
-version = "2.2.2"
+version = "2.5.0"
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
optional = false
python-versions = ">=3.8.0"
files = [
- {file = "torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:bc889d311a855dd2dfd164daf8cc903a6b7273a747189cebafdd89106e4ad585"},
- {file = "torch-2.2.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:15dffa4cc3261fa73d02f0ed25f5fa49ecc9e12bf1ae0a4c1e7a88bbfaad9030"},
- {file = "torch-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:11e8fe261233aeabd67696d6b993eeb0896faa175c6b41b9a6c9f0334bdad1c5"},
- {file = "torch-2.2.2-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:b2e2200b245bd9f263a0d41b6a2dab69c4aca635a01b30cca78064b0ef5b109e"},
- {file = "torch-2.2.2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:877b3e6593b5e00b35bbe111b7057464e76a7dd186a287280d941b564b0563c2"},
- {file = "torch-2.2.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:ad4c03b786e074f46606f4151c0a1e3740268bcf29fbd2fdf6666d66341c1dcb"},
- {file = "torch-2.2.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:32827fa1fbe5da8851686256b4cd94cc7b11be962862c2293811c94eea9457bf"},
- {file = "torch-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:f9ef0a648310435511e76905f9b89612e45ef2c8b023bee294f5e6f7e73a3e7c"},
- {file = "torch-2.2.2-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:95b9b44f3bcebd8b6cd8d37ec802048c872d9c567ba52c894bba90863a439059"},
- {file = "torch-2.2.2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:49aa4126ede714c5aeef7ae92969b4b0bbe67f19665106463c39f22e0a1860d1"},
- {file = "torch-2.2.2-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:cf12cdb66c9c940227ad647bc9cf5dba7e8640772ae10dfe7569a0c1e2a28aca"},
- {file = "torch-2.2.2-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:89ddac2a8c1fb6569b90890955de0c34e1724f87431cacff4c1979b5f769203c"},
- {file = "torch-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:451331406b760f4b1ab298ddd536486ab3cfb1312614cfe0532133535be60bea"},
- {file = "torch-2.2.2-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:eb4d6e9d3663e26cd27dc3ad266b34445a16b54908e74725adb241aa56987533"},
- {file = "torch-2.2.2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:bf9558da7d2bf7463390b3b2a61a6a3dbb0b45b161ee1dd5ec640bf579d479fc"},
- {file = "torch-2.2.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cd2bf7697c9e95fb5d97cc1d525486d8cf11a084c6af1345c2c2c22a6b0029d0"},
- {file = "torch-2.2.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b421448d194496e1114d87a8b8d6506bce949544e513742b097e2ab8f7efef32"},
- {file = "torch-2.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:3dbcd563a9b792161640c0cffe17e3270d85e8f4243b1f1ed19cca43d28d235b"},
- {file = "torch-2.2.2-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:31f4310210e7dda49f1fb52b0ec9e59382cfcb938693f6d5378f25b43d7c1d29"},
- {file = "torch-2.2.2-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:c795feb7e8ce2e0ef63f75f8e1ab52e7fd5e1a4d7d0c31367ade1e3de35c9e95"},
- {file = "torch-2.2.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:a6e5770d68158d07456bfcb5318b173886f579fdfbf747543901ce718ea94782"},
- {file = "torch-2.2.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:67dcd726edff108e2cd6c51ff0e416fd260c869904de95750e80051358680d24"},
- {file = "torch-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:539d5ef6c4ce15bd3bd47a7b4a6e7c10d49d4d21c0baaa87c7d2ef8698632dfb"},
- {file = "torch-2.2.2-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:dff696de90d6f6d1e8200e9892861fd4677306d0ef604cb18f2134186f719f82"},
- {file = "torch-2.2.2-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:3a4dd910663fd7a124c056c878a52c2b0be4a5a424188058fe97109d4436ee42"},
+ {file = "torch-2.5.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:7f179373a047b947dec448243f4e6598a1c960fa3bb978a9a7eecd529fbc363f"},
+ {file = "torch-2.5.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:15fbc95e38d330e5b0ef1593b7bc0a19f30e5bdad76895a5cffa1a6a044235e9"},
+ {file = "torch-2.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:f499212f1cffea5d587e5f06144630ed9aa9c399bba12ec8905798d833bd1404"},
+ {file = "torch-2.5.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:c54db1fade17287aabbeed685d8e8ab3a56fea9dd8d46e71ced2da367f09a49f"},
+ {file = "torch-2.5.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:499a68a756d3b30d10f7e0f6214dc3767b130b797265db3b1c02e9094e2a07be"},
+ {file = "torch-2.5.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:9f3df8138a1126a851440b7d5a4869bfb7c9cc43563d64fd9d96d0465b581024"},
+ {file = "torch-2.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:b81da3bdb58c9de29d0e1361e52f12fcf10a89673f17a11a5c6c7da1cb1a8376"},
+ {file = "torch-2.5.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ba135923295d564355326dc409b6b7f5bd6edc80f764cdaef1fb0a1b23ff2f9c"},
+ {file = "torch-2.5.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:2dd40c885a05ef7fe29356cca81be1435a893096ceb984441d6e2c27aff8c6f4"},
+ {file = "torch-2.5.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:bc52d603d87fe1da24439c0d5fdbbb14e0ae4874451d53f0120ffb1f6c192727"},
+ {file = "torch-2.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea718746469246cc63b3353afd75698a288344adb55e29b7f814a5d3c0a7c78d"},
+ {file = "torch-2.5.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6de1fd253e27e7f01f05cd7c37929ae521ca23ca4620cfc7c485299941679112"},
+ {file = "torch-2.5.0-cp313-cp313-manylinux1_x86_64.whl", hash = "sha256:83dcf518685db20912b71fc49cbddcc8849438cdb0e9dcc919b02a849e2cd9e8"},
+ {file = "torch-2.5.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:65e0a60894435608334d68c8811e55fd8f73e5bf8ee6f9ccedb0064486a7b418"},
+ {file = "torch-2.5.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:38c21ff1bd39f076d72ab06e3c88c2ea6874f2e6f235c9450816b6c8e7627094"},
+ {file = "torch-2.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:ce4baeba9804da5a346e210b3b70826f5811330c343e4fe1582200359ee77fe5"},
+ {file = "torch-2.5.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:03e53f577a96e4d41aca472da8faa40e55df89d2273664af390ce1f570e885bd"},
]
[package.dependencies]
@@ -3558,24 +3563,26 @@ filelock = "*"
fsspec = "*"
jinja2 = "*"
networkx = "*"
-nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cudnn-cu12 = {version = "8.9.2.26", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nccl-cu12 = {version = "2.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-sympy = "*"
-triton = {version = "2.2.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""}
+nvidia-cublas-cu12 = {version = "12.4.5.8", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-cupti-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-nvrtc-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-runtime-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cudnn-cu12 = {version = "9.1.0.70", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cufft-cu12 = {version = "11.2.1.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-curand-cu12 = {version = "10.3.5.147", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusolver-cu12 = {version = "11.6.1.9", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusparse-cu12 = {version = "12.3.1.170", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nccl-cu12 = {version = "2.21.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvjitlink-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvtx-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+setuptools = {version = "*", markers = "python_version >= \"3.12\""}
+sympy = {version = "1.13.1", markers = "python_version >= \"3.9\""}
+triton = {version = "3.1.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
typing-extensions = ">=4.8.0"
[package.extras]
opt-einsum = ["opt-einsum (>=3.3)"]
-optree = ["optree (>=0.9.1)"]
+optree = ["optree (>=0.12.0)"]
[[package]]
name = "tqdm"
@@ -3599,17 +3606,16 @@ telegram = ["requests"]
[[package]]
name = "triton"
-version = "2.2.0"
+version = "3.1.0"
description = "A language and compiler for custom Deep Learning operations"
optional = false
python-versions = "*"
files = [
- {file = "triton-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2294514340cfe4e8f4f9e5c66c702744c4a117d25e618bd08469d0bfed1e2e5"},
- {file = "triton-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da58a152bddb62cafa9a857dd2bc1f886dbf9f9c90a2b5da82157cd2b34392b0"},
- {file = "triton-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af58716e721460a61886668b205963dc4d1e4ac20508cc3f623aef0d70283d5"},
- {file = "triton-2.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8fe46d3ab94a8103e291bd44c741cc294b91d1d81c1a2888254cbf7ff846dab"},
- {file = "triton-2.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8ce26093e539d727e7cf6f6f0d932b1ab0574dc02567e684377630d86723ace"},
- {file = "triton-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:227cc6f357c5efcb357f3867ac2a8e7ecea2298cd4606a8ba1e931d1d5a947df"},
+ {file = "triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b0dd10a925263abbe9fa37dcde67a5e9b2383fc269fdf59f5657cac38c5d1d8"},
+ {file = "triton-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f34f6e7885d1bf0eaaf7ba875a5f0ce6f3c13ba98f9503651c1e6dc6757ed5c"},
+ {file = "triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8182f42fd8080a7d39d666814fa36c5e30cc00ea7eeeb1a2983dbb4c99a0fdc"},
+ {file = "triton-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dadaca7fc24de34e180271b5cf864c16755702e9f63a16f62df714a8099126a"},
+ {file = "triton-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aafa9a20cd0d9fee523cd4504aa7131807a864cd77dcf6efe7e981f18b8c6c11"},
]
[package.dependencies]
@@ -3617,8 +3623,8 @@ filelock = "*"
[package.extras]
build = ["cmake (>=3.20)", "lit"]
-tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"]
-tutorials = ["matplotlib", "pandas", "tabulate", "torch"]
+tests = ["autopep8", "flake8", "isort", "llnl-hatchet", "numpy", "pytest", "scipy (>=1.7.1)"]
+tutorials = ["matplotlib", "pandas", "tabulate"]
[[package]]
name = "types-pyyaml"
@@ -4056,5 +4062,4 @@ torch = ["torch"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9, <4.0"
-
-content-hash = "04e8c5a6806fa7e3a34f322dd63ba242cef635929360f57d126082edd1d9987c"
+content-hash = "9c7a0522920a4f485ebce56c7f5b7da0674523933e1ca9f167fa26650423da2a"