v1 agent rag debug and loss works

SylphAI-Inc · Dec 20, 2024 · 5886648 · 5886648
1 parent 3f4dd33
commit 5886648
Show file tree

Hide file tree

Showing 5 changed files with 223 additions and 13 deletions.
diff --git a/adalflow/adalflow/components/agent/react.py b/adalflow/adalflow/components/agent/react.py
@@ -359,7 +359,11 @@ def _execute_action(self, action_step: StepOutput) -> Optional[StepOutput]:
             return action_step
 
     def _run_one_step(
-        self, step: int, prompt_kwargs: Dict, model_kwargs: Dict
+        self,
+        step: int,
+        prompt_kwargs: Dict,
+        model_kwargs: Dict,
+        id: Optional[str] = None,
     ) -> Union[StepOutput, Parameter]:
         """Run one step of the agent. Plan and execute the action for the step.
         Need to deal with both train and eval mode on the self.planner.
@@ -373,7 +377,7 @@ def _run_one_step(
         )
 
         response: Union[GeneratorOutput, Parameter] = self.planner(
-            prompt_kwargs=prompt_kwargs, model_kwargs=model_kwargs
+            prompt_kwargs=prompt_kwargs, model_kwargs=model_kwargs, id=id
         )
 
         # create a new step output
@@ -430,21 +434,35 @@ def map_fn2(x: Parameter) -> GeneratorOutput:
                         f"Error: {x} does not have full_response attribute."
                     )
 
+            def map_parameter_to_step_output(x: Parameter) -> StepOutput:
+                if x and x.data:
+                    return x.data
+                else:
+                    raise ValueError(f"Error: {x} does not have data attribute.")
+
             # Bind `step_output` to a specific value using partial
             preinitialized_map_fn = partial(map_fn, step_output=step_output)
+            # execute the function and get the output
             response.add_successor_map_fn(
                 successor=self.generator_output_to_step_output, map_fn=map_fn2
             )
             output = self.generator_output_to_step_output.forward(
                 response, step_output, step, self._execute_action
             )
+            # add the output to the step history
+            output.add_successor_map_fn(
+                successor=self.append_step_history, map_fn=map_parameter_to_step_output
+            )
 
-            # connect response to append_step_history
+            # # connect response to append_step_history
             response.add_successor_map_fn(
                 successor=self.append_step_history, map_fn=preinitialized_map_fn
             )
 
-            # call self.append_step_history with the response
+            # # call self.append_step_history with the response
+            # self.step_history = self.append_step_history.forward(
+            #     output, self.step_history
+            # )
             self.step_history = self.append_step_history.forward(
                 response, self.step_history
             )
@@ -454,7 +472,6 @@ def map_fn2(x: Parameter) -> GeneratorOutput:
             )
             # printc(f"step_history 2: {self.step_history}", color="yellow")
             # convert step history back to data
-            # self.step_history = self.step_history.data
             return output
 
         else:
@@ -562,6 +579,7 @@ def bicall(
         input: str,
         promt_kwargs: Optional[Dict] = {},
         model_kwargs: Optional[Dict] = {},
+        id: Optional[str] = None,
     ) -> Any:
         r"""prompt_kwargs: additional prompt kwargs to either replace or add to the preset prompt kwargs."""
         prompt_kwargs = {**promt_kwargs, "input_str": input}
@@ -570,7 +588,7 @@ def bicall(
         for i in range(self.max_steps):
             step = i + 1
             try:
-                step_output = self._run_one_step(step, prompt_kwargs, model_kwargs)
+                step_output = self._run_one_step(step, prompt_kwargs, model_kwargs, id)
                 # if (
                 #     self.step_history[-1].function
                 #     and self.step_history[-1].function.name == "finish"
@@ -587,7 +605,8 @@ def bicall(
         # printc(f"answer:\n {answer}", color="green")
         # log.info(f"step_history: {self.step_history}")
         # self.reset()
-        return step_output
+        self.step_history.draw_graph()
+        return self.step_history
 
     def _extra_repr(self) -> str:
         s = f"max_steps={self.max_steps}, add_llm_as_fallback={self.add_llm_as_fallback}, "

diff --git a/adalflow/adalflow/optim/grad_component.py b/adalflow/adalflow/optim/grad_component.py
@@ -151,6 +151,7 @@ def backward(self, *, response: "Parameter", id: str = None, **kwargs):
 
         Subclass should implement this method if you need additional backward logic.
         """
+
         log.info(f"GradComponent backward: {response.name}")
         children_params = response.predecessors
 
@@ -169,3 +170,15 @@ def backward(self, *, response: "Parameter", id: str = None, **kwargs):
                 pred.add_score_to_trace(
                     trace_id=id, score=response._score, is_teacher=self.teacher_mode
                 )
+
+            # pass the current gradient to pred
+            # pred.add_gradient(
+            #     gradient=Parameter(
+            #         name=f"gradient",
+            #         data=response.get_gradient_and_context_text(
+            #             skip_correct_sample=True
+            #         ),
+            #         param_type=ParameterType.GRADIENT,
+            #         from_response_id=response.id,
+            #     )
+            # )
diff --git a/benchmarks/hotpot_qa/adal_exp/build_multi_hop_rag.py b/benchmarks/hotpot_qa/adal_exp/build_multi_hop_rag.py
@@ -651,8 +651,20 @@ def generator_as_tool(input: str) -> str:
             model_kwargs=model_kwargs,
         )
 
-    def call(self, input: str, id: str = None) -> str:
-        return self.agent(input=input)
+    def forward(self, *args, **kwargs) -> Parameter:
+        return self.bicall(*args, **kwargs)
+
+    def call(self, *args, **kwargs):
+        return self.bicall(*args, **kwargs)
+
+    def bicall(self, input: str, id: str = None) -> str:
+        out = self.agent(input=input, id=id)
+        if isinstance(out, adal.Parameter):
+            return out
+        return out[-1].observation
+        # if isinstance(out, adal.Parameter):
+        #     return out.data[-1].observation
+        # return out[-1].observation
 
 
 def test_multi_hop_retriever():
@@ -722,10 +734,10 @@ def test_agent_rag():
     task.train()
 
     output = task.forward(input=question)
-    print(output)
-    # output.draw_graph()
-    # output.draw_output_subgraph()
-    # output.draw_component_subgraph()
+    # print(output)
+    output.draw_graph()
+    output.draw_output_subgraph()
+    output.draw_component_subgraph()
 
 
 def test_multi_hop_retriever2():

diff --git a/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py b/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py
@@ -0,0 +1,158 @@
+from typing import Any, Callable, Dict, Tuple
+
+import adalflow as adal
+from adalflow.eval.answer_match_acc import AnswerMatchAcc
+from adalflow.datasets.types import HotPotQAData
+
+from benchmarks.hotpot_qa._adal_train import load_datasets
+from benchmarks.hotpot_qa.adal_exp.build_multi_hop_rag import AgenticRAG
+from use_cases.config import gpt_3_model, gpt_4o_model
+
+
+# TODO: look more into the loss function
+# TODO: test LLM judge too.
+class AgenticRAGAdal(adal.AdalComponent):
+    def __init__(
+        self,
+        model_client: adal.ModelClient,
+        model_kwargs: Dict,
+        backward_engine_model_config: Dict | None = None,
+        teacher_model_config: Dict | None = None,
+        text_optimizer_model_config: Dict | None = None,
+    ):
+        task = AgenticRAG(
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+        )
+        eval_fn = AnswerMatchAcc(type="fuzzy_match").compute_single_item
+        loss_fn = adal.EvalFnToTextLoss(
+            eval_fn=eval_fn, eval_fn_desc="fuzzy_match: 1 if str(y) in str(y_gt) else 0"
+        )
+        super().__init__(
+            task=task,
+            eval_fn=eval_fn,
+            loss_fn=loss_fn,
+            backward_engine_model_config=backward_engine_model_config,
+            teacher_model_config=teacher_model_config,
+            text_optimizer_model_config=text_optimizer_model_config,
+        )
+
+    # tell the trainer how to call the task
+    def prepare_task(self, sample: HotPotQAData) -> Tuple[Callable[..., Any], Dict]:
+        if self.task.training:
+            return self.task.forward, {"input": sample.question, "id": sample.id}
+        else:
+            return self.task.call, {"input": sample.question, "id": sample.id}
+
+    # TODO: use two map fn to make the cde even simpler
+
+    # eval mode: get the generator output, directly engage with the eval_fn
+    def prepare_eval(self, sample: HotPotQAData, y_pred: Any) -> float:
+        # y_label = ""
+        # if y_pred and y_pred.data and y_pred.data.answer:
+        #     y_label = y_pred.data.answer
+        return self.eval_fn, {"y": y_pred, "y_gt": sample.answer}
+
+    # train mode: get the loss and get the data from the full_response
+    def prepare_loss(self, sample: HotPotQAData, pred: adal.Parameter):
+        # prepare gt parameter
+        y_gt = adal.Parameter(
+            name="y_gt",
+            data=sample.answer,
+            eval_input=sample.answer,
+            requires_opt=False,
+        )
+
+        # pred's full_response is the output of the task pipeline which is GeneratorOutput
+        print(type(pred.data))
+        pred.eval_input = (
+            pred.data[-1].observation if pred.data and pred.data[-1] else ""
+        )
+        return self.loss_fn, {"kwargs": {"y": pred, "y_gt": y_gt}}
+
+
+# Note: diagnose is quite helpful, it helps you to quickly check if the evalfunction is the right metrics
+# i checked the eval which does fuzzy match, and found some yes and Yes are not matched, then converted both strings to lower and
+# the performances have gone up from 0.15 to 0.4
+def train_diagnose(
+    model_client: adal.ModelClient,
+    model_kwargs: Dict,
+) -> Dict:
+
+    trainset, valset, testset = load_datasets()
+
+    adal_component = AgenticRAGAdal(
+        model_client,
+        model_kwargs,
+        backward_engine_model_config=gpt_4o_model,
+        teacher_model_config=gpt_3_model,
+        text_optimizer_model_config=gpt_3_model,
+    )
+    trainer = adal.Trainer(adaltask=adal_component)
+    trainer.diagnose(dataset=trainset, split="train")
+    # trainer.diagnose(dataset=valset, split="val")
+    # trainer.diagnose(dataset=testset, split="test")
+
+
+def train(
+    train_batch_size=4,  # larger batch size is not that effective, probably because of llm's lost in the middle
+    raw_shots: int = 0,
+    bootstrap_shots: int = 4,
+    max_steps=1,
+    num_workers=4,
+    strategy="constrained",
+    optimization_order="sequential",
+    debug=False,
+    resume_from_ckpt=None,
+    exclude_input_fields_from_bootstrap_demos=True,
+):
+    adal_component = AgenticRAGAdal(
+        **gpt_3_model,
+        teacher_model_config=gpt_3_model,
+        text_optimizer_model_config=gpt_4o_model,  # gpt3.5 is not enough to be used as a good optimizer, it struggles for long contenxt
+        backward_engine_model_config=gpt_4o_model,
+    )
+    print(adal_component)
+    trainer = adal.Trainer(
+        train_batch_size=train_batch_size,
+        adaltask=adal_component,
+        strategy=strategy,
+        max_steps=max_steps,
+        num_workers=num_workers,
+        raw_shots=raw_shots,
+        bootstrap_shots=bootstrap_shots,
+        debug=debug,
+        weighted_sampling=True,
+        optimization_order=optimization_order,
+        exclude_input_fields_from_bootstrap_demos=exclude_input_fields_from_bootstrap_demos,
+        sequential_order=["text", "demo"],
+    )
+    print(trainer)
+
+    train_dataset, val_dataset, test_dataset = load_datasets()
+    trainer.fit(
+        train_dataset=train_dataset,
+        val_dataset=val_dataset,
+        test_dataset=test_dataset,
+        resume_from_ckpt=resume_from_ckpt,
+    )
+
+
+if __name__ == "__main__":
+    from use_cases.config import gpt_3_model
+
+    log = adal.get_logger(level="DEBUG", enable_console=False)
+
+    adal.setup_env()
+
+    # task = MultiHopRAGAdal(**gpt_3_model)
+    # print(task)
+
+    # train_diagnose(**gpt_3_model)
+
+    # train: 0.15 before the evaluator converted to lower and 0.4 after the conversion
+    train(
+        debug=True,
+        max_steps=12,
+        # resume_from_ckpt="/Users/liyin/.adalflow/ckpt/ValinaRAGAdal/random_max_steps_12_7c091_run_1.json",
+    )
diff --git a/tutorials/react_note.py b/tutorials/react_note.py
@@ -68,6 +68,14 @@ def test_react_agent(model_client: ModelClient, model_kwargs: dict):
         print("")
 
 
+"""
+To have an agent.
+input, prompt, template, step_history -> generator
+-> stepoutput -> step_history  -> generator -> stepoutput -> step_history
+-> generator -> stepoutput -> step_history -> generator -> stepoutput -> step_history
+"""
+
+
 def test_react_agent_train(model_client: ModelClient, model_kwargs: dict):
     tools = [multiply, add, divide]
     queries = [