Skip to content

Commit

Permalink
v1 agent rag debug and loss works
Browse files Browse the repository at this point in the history
  • Loading branch information
liyin2015 committed Dec 20, 2024
1 parent 3f4dd33 commit 5886648
Show file tree
Hide file tree
Showing 5 changed files with 223 additions and 13 deletions.
33 changes: 26 additions & 7 deletions adalflow/adalflow/components/agent/react.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,11 @@ def _execute_action(self, action_step: StepOutput) -> Optional[StepOutput]:
return action_step

def _run_one_step(
self, step: int, prompt_kwargs: Dict, model_kwargs: Dict
self,
step: int,
prompt_kwargs: Dict,
model_kwargs: Dict,
id: Optional[str] = None,
) -> Union[StepOutput, Parameter]:
"""Run one step of the agent. Plan and execute the action for the step.
Need to deal with both train and eval mode on the self.planner.
Expand All @@ -373,7 +377,7 @@ def _run_one_step(
)

response: Union[GeneratorOutput, Parameter] = self.planner(
prompt_kwargs=prompt_kwargs, model_kwargs=model_kwargs
prompt_kwargs=prompt_kwargs, model_kwargs=model_kwargs, id=id
)

# create a new step output
Expand Down Expand Up @@ -430,21 +434,35 @@ def map_fn2(x: Parameter) -> GeneratorOutput:
f"Error: {x} does not have full_response attribute."
)

def map_parameter_to_step_output(x: Parameter) -> StepOutput:
if x and x.data:
return x.data
else:
raise ValueError(f"Error: {x} does not have data attribute.")

# Bind `step_output` to a specific value using partial
preinitialized_map_fn = partial(map_fn, step_output=step_output)
# execute the function and get the output
response.add_successor_map_fn(
successor=self.generator_output_to_step_output, map_fn=map_fn2
)
output = self.generator_output_to_step_output.forward(
response, step_output, step, self._execute_action
)
# add the output to the step history
output.add_successor_map_fn(
successor=self.append_step_history, map_fn=map_parameter_to_step_output
)

# connect response to append_step_history
# # connect response to append_step_history
response.add_successor_map_fn(
successor=self.append_step_history, map_fn=preinitialized_map_fn
)

# call self.append_step_history with the response
# # call self.append_step_history with the response
# self.step_history = self.append_step_history.forward(
# output, self.step_history
# )
self.step_history = self.append_step_history.forward(
response, self.step_history
)
Expand All @@ -454,7 +472,6 @@ def map_fn2(x: Parameter) -> GeneratorOutput:
)
# printc(f"step_history 2: {self.step_history}", color="yellow")
# convert step history back to data
# self.step_history = self.step_history.data
return output

else:
Expand Down Expand Up @@ -562,6 +579,7 @@ def bicall(
input: str,
promt_kwargs: Optional[Dict] = {},
model_kwargs: Optional[Dict] = {},
id: Optional[str] = None,
) -> Any:
r"""prompt_kwargs: additional prompt kwargs to either replace or add to the preset prompt kwargs."""
prompt_kwargs = {**promt_kwargs, "input_str": input}
Expand All @@ -570,7 +588,7 @@ def bicall(
for i in range(self.max_steps):
step = i + 1
try:
step_output = self._run_one_step(step, prompt_kwargs, model_kwargs)
step_output = self._run_one_step(step, prompt_kwargs, model_kwargs, id)
# if (
# self.step_history[-1].function
# and self.step_history[-1].function.name == "finish"
Expand All @@ -587,7 +605,8 @@ def bicall(
# printc(f"answer:\n {answer}", color="green")
# log.info(f"step_history: {self.step_history}")
# self.reset()
return step_output
self.step_history.draw_graph()
return self.step_history

def _extra_repr(self) -> str:
s = f"max_steps={self.max_steps}, add_llm_as_fallback={self.add_llm_as_fallback}, "
Expand Down
13 changes: 13 additions & 0 deletions adalflow/adalflow/optim/grad_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def backward(self, *, response: "Parameter", id: str = None, **kwargs):
Subclass should implement this method if you need additional backward logic.
"""

log.info(f"GradComponent backward: {response.name}")
children_params = response.predecessors

Expand All @@ -169,3 +170,15 @@ def backward(self, *, response: "Parameter", id: str = None, **kwargs):
pred.add_score_to_trace(
trace_id=id, score=response._score, is_teacher=self.teacher_mode
)

# pass the current gradient to pred
# pred.add_gradient(
# gradient=Parameter(
# name=f"gradient",
# data=response.get_gradient_and_context_text(
# skip_correct_sample=True
# ),
# param_type=ParameterType.GRADIENT,
# from_response_id=response.id,
# )
# )
24 changes: 18 additions & 6 deletions benchmarks/hotpot_qa/adal_exp/build_multi_hop_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,8 +651,20 @@ def generator_as_tool(input: str) -> str:
model_kwargs=model_kwargs,
)

def call(self, input: str, id: str = None) -> str:
return self.agent(input=input)
def forward(self, *args, **kwargs) -> Parameter:
return self.bicall(*args, **kwargs)

def call(self, *args, **kwargs):
return self.bicall(*args, **kwargs)

def bicall(self, input: str, id: str = None) -> str:
out = self.agent(input=input, id=id)
if isinstance(out, adal.Parameter):
return out
return out[-1].observation
# if isinstance(out, adal.Parameter):
# return out.data[-1].observation
# return out[-1].observation


def test_multi_hop_retriever():
Expand Down Expand Up @@ -722,10 +734,10 @@ def test_agent_rag():
task.train()

output = task.forward(input=question)
print(output)
# output.draw_graph()
# output.draw_output_subgraph()
# output.draw_component_subgraph()
# print(output)
output.draw_graph()
output.draw_output_subgraph()
output.draw_component_subgraph()


def test_multi_hop_retriever2():
Expand Down
158 changes: 158 additions & 0 deletions benchmarks/hotpot_qa/adal_exp/train_agent_rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
from typing import Any, Callable, Dict, Tuple

import adalflow as adal
from adalflow.eval.answer_match_acc import AnswerMatchAcc
from adalflow.datasets.types import HotPotQAData

from benchmarks.hotpot_qa._adal_train import load_datasets
from benchmarks.hotpot_qa.adal_exp.build_multi_hop_rag import AgenticRAG
from use_cases.config import gpt_3_model, gpt_4o_model


# TODO: look more into the loss function
# TODO: test LLM judge too.
class AgenticRAGAdal(adal.AdalComponent):
def __init__(
self,
model_client: adal.ModelClient,
model_kwargs: Dict,
backward_engine_model_config: Dict | None = None,
teacher_model_config: Dict | None = None,
text_optimizer_model_config: Dict | None = None,
):
task = AgenticRAG(
model_client=model_client,
model_kwargs=model_kwargs,
)
eval_fn = AnswerMatchAcc(type="fuzzy_match").compute_single_item
loss_fn = adal.EvalFnToTextLoss(
eval_fn=eval_fn, eval_fn_desc="fuzzy_match: 1 if str(y) in str(y_gt) else 0"
)
super().__init__(
task=task,
eval_fn=eval_fn,
loss_fn=loss_fn,
backward_engine_model_config=backward_engine_model_config,
teacher_model_config=teacher_model_config,
text_optimizer_model_config=text_optimizer_model_config,
)

# tell the trainer how to call the task
def prepare_task(self, sample: HotPotQAData) -> Tuple[Callable[..., Any], Dict]:
if self.task.training:
return self.task.forward, {"input": sample.question, "id": sample.id}
else:
return self.task.call, {"input": sample.question, "id": sample.id}

# TODO: use two map fn to make the cde even simpler

# eval mode: get the generator output, directly engage with the eval_fn
def prepare_eval(self, sample: HotPotQAData, y_pred: Any) -> float:
# y_label = ""
# if y_pred and y_pred.data and y_pred.data.answer:
# y_label = y_pred.data.answer
return self.eval_fn, {"y": y_pred, "y_gt": sample.answer}

# train mode: get the loss and get the data from the full_response
def prepare_loss(self, sample: HotPotQAData, pred: adal.Parameter):
# prepare gt parameter
y_gt = adal.Parameter(
name="y_gt",
data=sample.answer,
eval_input=sample.answer,
requires_opt=False,
)

# pred's full_response is the output of the task pipeline which is GeneratorOutput
print(type(pred.data))
pred.eval_input = (
pred.data[-1].observation if pred.data and pred.data[-1] else ""
)
return self.loss_fn, {"kwargs": {"y": pred, "y_gt": y_gt}}


# Note: diagnose is quite helpful, it helps you to quickly check if the evalfunction is the right metrics
# i checked the eval which does fuzzy match, and found some yes and Yes are not matched, then converted both strings to lower and
# the performances have gone up from 0.15 to 0.4
def train_diagnose(
model_client: adal.ModelClient,
model_kwargs: Dict,
) -> Dict:

trainset, valset, testset = load_datasets()

adal_component = AgenticRAGAdal(
model_client,
model_kwargs,
backward_engine_model_config=gpt_4o_model,
teacher_model_config=gpt_3_model,
text_optimizer_model_config=gpt_3_model,
)
trainer = adal.Trainer(adaltask=adal_component)
trainer.diagnose(dataset=trainset, split="train")
# trainer.diagnose(dataset=valset, split="val")
# trainer.diagnose(dataset=testset, split="test")


def train(
train_batch_size=4, # larger batch size is not that effective, probably because of llm's lost in the middle
raw_shots: int = 0,
bootstrap_shots: int = 4,
max_steps=1,
num_workers=4,
strategy="constrained",
optimization_order="sequential",
debug=False,
resume_from_ckpt=None,
exclude_input_fields_from_bootstrap_demos=True,
):
adal_component = AgenticRAGAdal(
**gpt_3_model,
teacher_model_config=gpt_3_model,
text_optimizer_model_config=gpt_4o_model, # gpt3.5 is not enough to be used as a good optimizer, it struggles for long contenxt
backward_engine_model_config=gpt_4o_model,
)
print(adal_component)
trainer = adal.Trainer(
train_batch_size=train_batch_size,
adaltask=adal_component,
strategy=strategy,
max_steps=max_steps,
num_workers=num_workers,
raw_shots=raw_shots,
bootstrap_shots=bootstrap_shots,
debug=debug,
weighted_sampling=True,
optimization_order=optimization_order,
exclude_input_fields_from_bootstrap_demos=exclude_input_fields_from_bootstrap_demos,
sequential_order=["text", "demo"],
)
print(trainer)

train_dataset, val_dataset, test_dataset = load_datasets()
trainer.fit(
train_dataset=train_dataset,
val_dataset=val_dataset,
test_dataset=test_dataset,
resume_from_ckpt=resume_from_ckpt,
)


if __name__ == "__main__":
from use_cases.config import gpt_3_model

log = adal.get_logger(level="DEBUG", enable_console=False)

adal.setup_env()

# task = MultiHopRAGAdal(**gpt_3_model)
# print(task)

# train_diagnose(**gpt_3_model)

# train: 0.15 before the evaluator converted to lower and 0.4 after the conversion
train(
debug=True,
max_steps=12,
# resume_from_ckpt="/Users/liyin/.adalflow/ckpt/ValinaRAGAdal/random_max_steps_12_7c091_run_1.json",
)
8 changes: 8 additions & 0 deletions tutorials/react_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ def test_react_agent(model_client: ModelClient, model_kwargs: dict):
print("")


"""
To have an agent.
input, prompt, template, step_history -> generator
-> stepoutput -> step_history -> generator -> stepoutput -> step_history
-> generator -> stepoutput -> step_history -> generator -> stepoutput -> step_history
"""


def test_react_agent_train(model_client: ModelClient, model_kwargs: dict):
tools = [multiply, add, divide]
queries = [
Expand Down

0 comments on commit 5886648

Please sign in to comment.