config backward engine to gradcomponent, allow disable it especially …

…in the demo process, track the steps in the text optimizer, add last query in the multihop retriever
SylphAI-Inc · Jan 10, 2025 · ee81bb7 · ee81bb7
1 parent e251d7c
commit ee81bb7
Show file tree

Hide file tree

Showing 16 changed files with 263 additions and 119 deletions.
diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
@@ -377,6 +377,7 @@ def _pre_call(self, prompt_kwargs: Dict, model_kwargs: Dict) -> Dict[str, Any]:
             model_kwargs=composed_model_kwargs,
             model_type=self.model_type,
         )
+        # printc(f"api_kwargs: {api_kwargs}", color="red")
         return api_kwargs
 
     def _model_client_call(self, api_kwargs: Dict, use_cache: bool = False) -> Any:
@@ -545,6 +546,7 @@ def forward(
                         self.model_kwargs, model_kwargs
                     ),
                 }
+                # printc(f"input_args: {input_args}", color="red")
 
                 output = self.call(**input_args, id=id)
                 if not isinstance(output, GeneratorOutput):
@@ -673,7 +675,8 @@ def backward(
 
         # 1.backward for text-gradients
         if backward_engine:
-            log.debug(
+
+            printc(
                 f"Generator: Backward engine is set for the generator. {backward_engine}"
             )
             if response.backward_engine_disabled:
@@ -1062,7 +1065,7 @@ def _run_callbacks(
 
     def call(
         self,
-        prompt_kwargs: Optional[Dict] = {},  # the input need to be passed to the prompt
+        prompt_kwargs: Optional[Dict] = {},  # supports both str and parameter value
         model_kwargs: Optional[Dict] = {},
         use_cache: Optional[bool] = None,
         id: Optional[str] = None,

diff --git a/adalflow/adalflow/optim/grad_component.py b/adalflow/adalflow/optim/grad_component.py
@@ -84,6 +84,9 @@ def set_backward_engine(
                     f"EvalFnToTextLoss: backward_engine must be an instance of BackwardEngine. Got {type(backward_engine)}."
                 )
 
+    def disable_backward_engine(self):
+        self.backward_engine = None
+
     def call(self, *args, **kwargs):
         raise NotImplementedError("call method is not implemented")
 
@@ -347,25 +350,25 @@ def __init__(
                 )
             self.backward_engine = backward_engine
 
-    def set_backward_engine(
-        self,
-        backward_engine: "BackwardEngine" = None,
-        model_client: "ModelClient" = None,
-        model_kwargs: Dict[str, object] = None,
-    ):
-        from adalflow.core.generator import BackwardEngine
-
-        self.backward_engine = backward_engine
-        if not backward_engine:
-            log.info(
-                "EvalFnToTextLoss: No backward engine provided. Creating one using model_client and model_kwargs."
-            )
-            self.backward_engine = BackwardEngine(model_client, model_kwargs)
-        else:
-            if type(backward_engine) is not BackwardEngine:
-                raise TypeError(
-                    f"EvalFnToTextLoss: backward_engine must be an instance of BackwardEngine. Got {type(backward_engine)}."
-                )
+    # def set_backward_engine(
+    #     self,
+    #     backward_engine: "BackwardEngine" = None,
+    #     model_client: "ModelClient" = None,
+    #     model_kwargs: Dict[str, object] = None,
+    # ):
+    #     from adalflow.core.generator import BackwardEngine
+
+    #     self.backward_engine = backward_engine
+    #     if not backward_engine:
+    #         log.info(
+    #             "EvalFnToTextLoss: No backward engine provided. Creating one using model_client and model_kwargs."
+    #         )
+    #         self.backward_engine = BackwardEngine(model_client, model_kwargs)
+    #     else:
+    #         if type(backward_engine) is not BackwardEngine:
+    #             raise TypeError(
+    #                 f"EvalFnToTextLoss: backward_engine must be an instance of BackwardEngine. Got {type(backward_engine)}."
+    #             )
 
     @staticmethod
     def _backward_through_one_predecessor(

diff --git a/adalflow/adalflow/optim/loss_component.py b/adalflow/adalflow/optim/loss_component.py
@@ -41,6 +41,9 @@ def __call__(self, *args, **kwargs):
     def set_backward_engine(self, backward_engine: "BackwardEngine", *args, **kwargs):
         raise NotImplementedError("set_backward_engine method is not implemented")
 
+    def disable_backward_engine(self):
+        self.backward_engine = None
+
     def forward(self, *args, **kwargs) -> "Parameter":
         r"""Default just wraps the call method."""
         raise NotImplementedError("forward method is not implemented")

diff --git a/adalflow/adalflow/optim/parameter.py b/adalflow/adalflow/optim/parameter.py
@@ -53,6 +53,9 @@ class GradientContext(DataClass):
     response_desc: str = field(
         metadata={"desc": "The description of the response parameter"}
     )
+    # input: Dict[str, Any] = field(
+    #     metadata={"desc": "The input to the whole system"}, default=None
+    # )
 
     # ground_truth: Any = field(
     #     metadata={"desc": "The ground truth of the response parameter"}, default=None

diff --git a/adalflow/adalflow/optim/text_grad/backend_engine_prompt.py b/adalflow/adalflow/optim/text_grad/backend_engine_prompt.py
@@ -79,12 +79,20 @@
 # LLM: Answer questions by reading the context  and reason the best answer.
 # </TASK_PIPELINE>
 # You are the feedback engine in an optimization system consisting of multiple components.
+# You are the feedback engine to provide feedback for a target variable in a compound LLM system.
+
+# The evaluation and feedback is backpropogated all the way to you, and you will assess the current component's inputs, output along with its feedback.
+# A component can have multiple inputs, and you handle one that is enclosed in <TARGET_VARIABLE> or <VARIABLES> tags.
+# You will provide intelligent and creative feedback so that the optimizer can optimize this variable to improve the objective enclosed in <OBJECTIVE_FUNCTION> tags.
+
 FEEDBACK_ENGINE_TEMPLATE = r"""<START_OF_SYSTEM_PROMPT>
-You are the feedback engine to provide feedback for a target variable in a compound LLM system.
+You are a detective excel at determining the root cause of a system error.
+You start with an evaluation function that measures performance, and you receive the system input.
+The system can be a a compound system, potentially consisting of multiple components.
+You will receive feedback from your direct successor, and your goal is to investigate your component’s inputs and outputs to identify whether any of your input variables are causing the error.
 
-The evaluation and feedback is backpropogated all the way to you, and you will assess the current component's inputs, output along with its feedback.
-A component can have multiple inputs, and you handle one that is enclosed in <TARGET_VARIABLE> or <VARIABLES> tags.
-You will provide intelligent and creative feedback so that the optimizer can optimize this variable to improve the objective enclosed in <OBJECTIVE_FUNCTION> tags.
+Your target input variable is enclosed in <TARGET_VARIABLE> (representing one of the input variables that may or may not be causing the error).
+Alternatively, it may be enclosed in <VARIABLES> tags (in which case you must pass feedback to all variables, indicating which ones cause the errors and which do not).
 
 1. From <CONVERSATION></CONVERSATION> section, you can find how the variable is obtained and used.
 2. As there might be multiple precedessors, and multi-components, it is possible that the feedback/error is not directly related to the variable itself.
@@ -192,23 +200,23 @@
 # Note: {{metadata}}
 # {% endif %}"""
 
-LOSS_CONVERSATION_TEMPLATE_STRING = r"""
-The variable is passed to the eval function and compared with a expected value(y_gt or ground_truth).
+# LOSS_CONVERSATION_TEMPLATE_STRING = r"""
+# The variable is passed to the eval function and compared with a expected value(y_gt or ground_truth).
 
-EVAL_FUNC: {{eval_fn_desc}}
+# EVAL_FUNC: {{eval_fn_desc}}
 
-INPUTS:
-{% for key, (value, eval_type) in inputs.items() %}
-({{ key }}) (role: {{ value.role_desc }}),
-data: {{ value.prompt_data }},
-input_to_eval_fn: {{ value.eval_input }},
-data_type: {{ eval_type }}
-{% endfor %}
+# INPUTS:
+# {% for key, (value, eval_type) in inputs.items() %}
+# ({{ key }}) (role: {{ value.role_desc }}),
+# data: {{ value.prompt_data }},
+# input_to_eval_fn: {{ value.eval_input }},
+# data_type: {{ eval_type }}
+# {% endfor %}
 
-OUTPUTS/SCORE: {{response_value}}
-{% if metadata %}
-Note: {{metadata}}
-{% endif %}"""
+# OUTPUTS/SCORE: {{response_value}}
+# {% if metadata %}
+# Note: {{metadata}}
+# {% endif %}"""
 
 
 ### Variable to get feedback on, often it is pred in the loss component
@@ -223,11 +231,12 @@
 
 ###  Loss/Score Information  ###
 LOSS_CONVERSATION_TEMPLATE_STRING = r"""
-The variable is passed to the eval function and compared with a target/ground truth value.
+The variable is passed to the eval function and compared with a target/ground truth value to get
+its score regarding to a SYSTEM_QUESTION: {{system_question}}.
 
 EVAL_FUNC: {{eval_fn_desc}}
 
-INPUTS:
+INPUTS to EVAL_FUNC:
 {% for key, (value, eval_type) in inputs.items() %}
 ({{ key }}) (role: {{ value.role_desc }}),
 data: {{ value.prompt_data }},

diff --git a/adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py b/adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py
@@ -103,6 +103,7 @@ def forward(
         metadata: Dict[str, str] = None,  # additional notes on the input kwargs
         id: str = None,
         gt: object = None,
+        input: Dict[str, object] = None,
     ) -> Parameter:
         r"""
         Args:
@@ -158,6 +159,7 @@ def forward(
                 kwargs=kwargs,
                 metadata=metadata,
                 ground_truth=gt,
+                input=input,
             )
         )
         return eval_param
@@ -192,6 +194,7 @@ def _backward_through_one_predecessor(
         ground_truth: object = None,
         is_intermediate_node: bool = False,  # if the node is an intermediate node in the backpropagation chain
         metadata: Dict[str, str] = None,
+        input: Dict[str, object] = None,  # system input
     ):
         if not pred.requires_opt:
             if response.score is not None:
@@ -231,6 +234,7 @@ def _backward_through_one_predecessor(
         conversation_str = Prompt(
             LOSS_CONVERSATION_TEMPLATE_STRING,
             prompt_kwargs={
+                "system_question": input,
                 "inputs": inputs,
                 "eval_fn_desc": eval_fn_desc,
                 "response_value": response.get_prompt_data(),
@@ -304,6 +308,7 @@ def _backward_through_one_predecessor(
                 input_output=conversation_str,
                 response_desc=response.role_desc,
                 variable_desc=pred.role_desc,
+                input=input,
                 # ground_truth=ground_truth,
             )
         )
@@ -330,6 +335,7 @@ def backward(
             "BackwardEngine"
         ] = None,  # only needed for text prompt optimization
         metadata: Dict[str, str] = None,
+        input: Dict[str, object] = None,
     ):
         r"""Ensure to set backward_engine for the text prompt optimization. It can be None if you
         are only doing demo optimization and it will not have gradients but simply backpropagate the score.
@@ -362,6 +368,7 @@ def backward(
                         ground_truth=ground_truth,
                         is_intermediate_node=is_intermediate_node,
                         metadata=metadata,
+                        input=input,
                     )
             else:  # recursively disable backward for all children
                 for pred in children_params:

diff --git a/adalflow/adalflow/optim/text_grad/tgd_optimizer.py b/adalflow/adalflow/optim/text_grad/tgd_optimizer.py
@@ -50,6 +50,7 @@ class HistoryPrompt(DataClass):
 {{optimizer_system_prompt}}
 <END_OF_SYSTEM_PROMPT>
 <START_OF_USER_MESSAGE>
+You are {{steps}} steps since your successful improvement.
 {# Variable and peers info #}
 <START_OF_VARIABLE_AND_PEERS_INFO>
 {{variable_and_peers_info}}
@@ -79,7 +80,7 @@ class HistoryPrompt(DataClass):
 IMPORTANT: Your goal is to generate new variable that score higher than all past iterations.
 {# Momentum #}
 {% if failed_proposals %}
-Here are the past failed proposals:
+Here are the most recent failed proposals:
 {% for failed_proposal in failed_proposals %}
 {{loop.index}}. {{failed_proposal}}
 {% endfor %}
@@ -183,6 +184,7 @@ class HistoryPrompt(DataClass):
 2. Observe past performance patterns (when available) to retain good qualities in the variable.
 3. **System Awareness**: When other system variables are given, ensure you understand how this variable works in the whole system.
    You have a choice to not update a variable if it is not responsible for the error. Just keep the `update` field as `False`.
+You MUST not update variable when there is no clear error indicated in a multi-component system.
 4. **Peer Awareness**: This variable works together with Peer variables, ensure you are aware of their roles and constraints.
 5. Be Creative. If adding new elements, be concise.
 
@@ -339,6 +341,7 @@ def __init__(
         num_gradient_memory: int = 0,  # TODO: gradient memory and momentum, for now it is not useful
         max_past_history: int = 3,
         max_failed_proposals: int = 2,
+        steps_from_last_improvement: int = 0,
     ):
         from adalflow.core.generator import Generator
         from adalflow.core import Prompt
@@ -385,6 +388,7 @@ def __init__(
 
         self.max_past_history = max_past_history
         self.max_failed_proposals = max_failed_proposals
+        self.steps_from_last_improvement = steps_from_last_improvement
 
         # initate the past history for each parameter
         for param in self.params:
@@ -405,6 +409,12 @@ def constraint_text(self):
         ]
         return "\n".join(constraints_ordered)
 
+    def increment_steps_from_last_improvement(self):
+        self.steps_from_last_improvement += 1
+
+    def reset_steps_from_last_improvement(self):
+        self.steps_from_last_improvement = 0
+
     def add_score_to_params(self, val_score: float):
         for param in self.params:
             self.add_score_to_current_param(param.id, param, val_score)
@@ -532,6 +542,7 @@ def _get_user_prompt_kwargs(self, param: Parameter) -> Dict[str, str]:
                 else None
             ),
             "system_variables": system_params,
+            "steps": self.steps_from_last_improvement,
         }
 
         return user_prompt_kwargs

diff --git a/adalflow/adalflow/optim/trainer/adal.py b/adalflow/adalflow/optim/trainer/adal.py
@@ -175,7 +175,7 @@ def configure_optimizers(self, *args, **kwargs) -> List[Optimizer]:
         return self._demo_optimizers + self._text_optimizers
 
     def configure_backward_engine(self, *args, **kwargs):
-        r"""Configure a backward engine for all generators in the task for bootstrapping examples."""
+        r"""Configure a backward engine for all GradComponent in the task for bootstrapping examples."""
         # check if backward engine is already configured
         if self.backward_engine:
             log.warning("Backward engine is already configured.")
@@ -191,6 +191,10 @@ def configure_backward_engine(self, *args, **kwargs):
             backward_pass_setup=kwargs.get("backward_pass_setup", None),
         )
 
+    def disable_backward_engine(self):
+        r"""Disable the backward engine for all GradComponent in the task."""
+        self.disable_backward_engine_helper()
+
     # def configure_backward_engine(self, *args, **kwargs):
     #     raise NotImplementedError("configure_backward_engine method is not implemented")
 
@@ -591,6 +595,23 @@ def configure_teacher_generator_helper(
             generator.set_teacher_generator(teacher_generator)
         print("Teacher generator configured.")
 
+    def disable_backward_engine_helper(self):
+        r"""Disable the backward engine for all generators in the task."""
+        all_grads = self._find_all_grad_components()
+        for _, grad in all_grads:
+            if hasattr(grad, "disable_backward_engine") and callable(
+                getattr(grad, "disable_backward_engine", None)
+            ):
+                grad.disable_backward_engine()
+        print("Backward engine disabled for GradComponents")
+
+        if not self.loss_fn:
+            raise ValueError("Loss function is not configured.")
+
+        # configure it for loss_fn
+        if self.loss_fn:
+            self.loss_fn.disable_backward_engine()
+
     def configure_backward_engine_helper(
         self,
         model_client: "ModelClient",

diff --git a/adalflow/adalflow/optim/trainer/trainer.py b/adalflow/adalflow/optim/trainer/trainer.py
@@ -623,6 +623,7 @@ def run_text_optimizers(starting_step: int, trainer_results: TrainerResult):
             def run_demo_optimizers(starting_step: int, trainer_results: TrainerResult):
                 if len(self.demo_optimizers) > 0:
                     self.adaltask.configure_teacher_generator()
+                    self.adaltask.disable_backward_engine()  # disable it to avoid backward engine for gradients
                     self._fit_demos_random(
                         train_loader,
                         train_dataset,
@@ -1086,6 +1087,14 @@ def _revert_text_optimizers(self):
         for text_optimizer in self.text_optimizers:
             text_optimizer.revert()
 
+    def _increment_step_from_last_improvement_text_optimizers(self):
+        for text_optimizer in self.text_optimizers:
+            text_optimizer.increment_steps_from_last_improvement()
+
+    def _reset_steps_from_last_improvement_text_optimizers(self):
+        for text_optimizer in self.text_optimizers:
+            text_optimizer.reset_steps_from_last_improvement()
+
     def _check_optimizer_proposal(self):
         r"""Return True if all optimizers have proposed a new prompt"""
         for text_optimizer in self.text_optimizers:
@@ -2094,6 +2103,7 @@ def _text_grad_constraint_propose_step(
                 )
                 all_samples, all_losses, all_y_preds = [], [], []
                 val_score_increased = True
+                self._reset_steps_from_last_improvement_text_optimizers()
                 break
             else:
                 print(f"Optimizer revert: {val_score} <= {last_val_score}")
@@ -2105,7 +2115,6 @@ def _text_grad_constraint_propose_step(
                     self._demo_optimizers_revert()
 
                 continue
-
         if not val_score_increased:
             print("No proposal can improve the subset and full set, and val set")
             self._zero_grad_text_optimizers()
@@ -2119,6 +2128,7 @@ def _text_grad_constraint_propose_step(
                 total_steps,
                 attempted_val_score=val_score,
             )
+            self._increment_step_from_last_improvement_text_optimizers()
 
         print(f"Saving checkpoint to {self.ckpt_file}")
         trainer_results.effective_measure = self._effective_measure