Skip to content

Commit

Permalink
Merge pull request #171 from SylphAI-Inc/main
Browse files Browse the repository at this point in the history
[Release V0.2.0.beta.1]
  • Loading branch information
liyin2015 authored Aug 15, 2024
2 parents e2f0116 + ef07607 commit 93a6de0
Show file tree
Hide file tree
Showing 22 changed files with 587 additions and 301 deletions.
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,14 @@

<h1>
<p align="center">
AdalFlow: The Library for LLM Applications
AdalFlow: The Library to Build and to Auto-optimize LLM Applications
</p>
</h1>

AdalFlow helps developers build and optimize LLM task pipelines.
Embracing similar design pattern to PyTorch, AdalFlow is light, modular, and robust, with a 100% readable codebase.


# AdalFlow: A Tribute to Ada Lovelace

AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_Lovelace), the pioneering female mathematician who first recognized that machines could do more than just calculations. As a female-led team, we aim to inspire more women to enter the AI field.

# Why AdalFlow?

LLMs are like water; they can be shaped into anything, from GenAI applications such as chatbots, translation, summarization, code generation, and autonomous agents to classical NLP tasks like text classification and named entity recognition. They interact with the world beyond the model’s internal knowledge via retrievers, memory, and tools (function calls). Each use case is unique in its data, business logic, and user experience.
Expand Down Expand Up @@ -304,7 +300,9 @@ AdalFlow full documentation available at [lightrag.sylph.ai](https://lightrag.sy
- [API reference](https://lightrag.sylph.ai/apis/index.html)


# AdalFlow: A Tribute to Ada Lovelace

AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_Lovelace), the pioneering female mathematician who first recognized that machines could do more than just calculations. As a female-led team, we aim to inspire more women to enter the AI field.

# Contributors

Expand Down
4 changes: 4 additions & 0 deletions adalflow/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## [0.2.0.beta.2] - 2024-08-15
### Improved
- ``Demo Optimizer`` with "learn-to-reason" one shot to achieve 94% on the object count, close to the 98% by the teacher gpt-4o model.

## [0.2.0.beta.1] - 2024-08-14
### Added
- Optimizer: `paramter`, `GradComponent`, `Optimizer`, `AdalComponent`, and `Trainer`.
Expand Down
2 changes: 1 addition & 1 deletion adalflow/adalflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.1.0-beta.6"
__version__ = "0.2.0-beta.2"

from adalflow.core.component import Component, fun_to_component

Expand Down
15 changes: 13 additions & 2 deletions adalflow/adalflow/core/base_data_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,18 @@ def to_yaml(
self.to_dict(exclude=exclude, include=include),
default_flow_style=False,
sort_keys=False,
)
).strip()

def dict_to_yaml(self, data: Dict[str, Any]) -> str:
"""Convert a dictionary to a YAML string.
Args:
data (Dict[str, Any]): The dictionary to convert to a YAML string.
Returns:
str: The YAML string representation of the dictionary.
"""
return yaml.dump(data, default_flow_style=False, sort_keys=False).strip()

@classmethod
def to_schema(
Expand Down Expand Up @@ -553,7 +564,7 @@ def to_schema_str(
) -> str:
"""Generate a Json schema which is more detailed than the signature."""
schema = cls.to_schema(exclude=exclude, include=include)
return json.dumps(schema, indent=4)
return json.dumps(schema, indent=4).strip()

@classmethod
def to_yaml_signature(
Expand Down
34 changes: 22 additions & 12 deletions adalflow/adalflow/core/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os
import json

from typing import Any, Dict, Optional, Union, Callable
from typing import Any, Dict, Optional, Union, Callable, Tuple, List
from copy import deepcopy
import logging

Expand Down Expand Up @@ -168,7 +168,9 @@ def __init__(
self._teacher: Optional["Generator"] = None

@staticmethod
def _get_default_mapping(output: "GeneratorOutput" = None) -> Dict[str, Callable]:
def _get_default_mapping(
output: "GeneratorOutput" = None,
) -> Tuple[Dict[str, Callable], List[str]]:

if (
output.data
Expand All @@ -180,15 +182,20 @@ def _get_default_mapping(output: "GeneratorOutput" = None) -> Dict[str, Callable
output_mapping = {
f: lambda x, f=f: getattr(x.data, f) for f in output_fields
}
elif output.data:
output_fields = ["raw_response", "data"]
elif output.raw_response:
output_fields = ["raw_response"]
output_mapping = {f: lambda x, f=f: getattr(x, f) for f in output_fields}
elif output.error:
output_mapping = {
"error": lambda x: x.error,
"raw_response": lambda x: x.raw_response,
}
return output_mapping
output_fields = ["Answer"]
output_mapping["Example"] = output_mapping["raw_response"]
del output_mapping["raw_response"]
# elif output.error:
# output_fields = ["raw_response", "error"]
# output_mapping = {
# "error": lambda x: x.error,
# "raw_response": lambda x: x.raw_response,
# }
# output_fields = ["Answer"]
return output_mapping, output_fields

def set_mock_output(
self, mock_output: bool = True, mock_output_data: str = "mock data"
Expand Down Expand Up @@ -346,7 +353,9 @@ def create_demo_data_instance(

# map the input fields
demo_data = {"id": id}
demo_data_class_output_mapping = self._get_default_mapping(output)
demo_data_class_output_mapping, output_fields = self._get_default_mapping(
output
)

for k, v in input_prompt_kwargs.items():
demo_data[k] = v
Expand All @@ -355,8 +364,9 @@ def create_demo_data_instance(
demo_data[key] = value(output)

obj = DynamicDataClassFactory.from_dict(demo_data)
obj.set_input_fields([k for k in input_prompt_kwargs.keys()])
obj.set_output_fields(output_fields)
if obj is None:
print(f"Error creating the demo data instance:{demo_data}")
raise ValueError(f"Error creating the demo data instance:{demo_data}")
return obj

Expand Down
52 changes: 43 additions & 9 deletions adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,20 @@ class BootstrapFewShot(DemoOptimizer):
__doc__ = r"""BootstrapFewShot performs few-shot sampling used in few-shot ICL.
It will be used to optimize paramters of demos.
Based on research from AdalFlow team and DsPy library.
Compared with Dspy's version, we added weighted sampling for both the raw and augmented demos
to prioritize failed demos but successful in augmented demos based on the evaluation score
while we backpropagate the demo samples.
Compared with Dspy's version:
1. we added weighted sampling for both the raw and augmented demos
to prioritize failed demos but successful in augmented demos based on the evaluation score
while we backpropagate the demo samples.
2. In default, we exclude the input fields from the augmented demos. Our reserch finds that
using the reasoning demostrations from teacher model can be more effective to just take inputs and output
samples and be more token efficient.
Reference:
- DsPy: Com-piling declarative language model calls into state-of-the-art pipelines.
"""
exclude_input_fields_from_bootstrap_demos: bool = True

def __init__(
self,
Expand All @@ -38,6 +44,7 @@ def __init__(
bootstrap_shots: Optional[int] = None,
dataset: Optional[List[DataClass]] = None,
weighted: bool = True,
exclude_input_fields_from_bootstrap_demos: bool = True,
):
super().__init__(weighted=weighted, dataset=dataset)
self.params = [
Expand All @@ -53,6 +60,9 @@ def __init__(
self.proposing = False
self._teacher_scores: Dict[str, float] = {} # data id to score
self._student_scores: Dict[str, float] = {} # data id to score
self.exclude_input_fields_from_bootstrap_demos = (
exclude_input_fields_from_bootstrap_demos
)

def add_scores(self, ids: List[str], scores: List[float], is_teacher: bool = True):
if len(ids) != len(scores):
Expand Down Expand Up @@ -160,11 +170,23 @@ def sample(
return sampled_augmented_demos, sampled_raw_demos

@staticmethod
def samples_to_str(samples: List[DataClass]) -> str:
def samples_to_str(
samples: List[DataClass], augmented: bool = False, exclude_inputs: bool = False
) -> str:
sample_strs = []
for sample in samples:
try:
sample_strs.append(sample.to_yaml(exclude=["id", "score"]))

# process the input fields
if augmented:
exclude_fields = ["id", "score"]
if exclude_inputs:
exclude_fields.extend(sample.get_input_fields())
yaml_str = sample.to_yaml(exclude=exclude_fields)

else:
yaml_str = sample.to_yaml(exclude=["id", "score"])
sample_strs.append(yaml_str + "\n")
except Exception as e:
print(f"Error: {e} to yaml for {sample}")
sample_strs.append(str(sample))
Expand All @@ -188,12 +210,24 @@ def propose(self):
bootstrap_shots=self._bootstrap_shots,
weighted=self._weighted,
)
print(
f"sampled_augmented_demos: {[demo.id for demo in sampled_augmented_demos]}"
)
samples = sampled_augmented_demos + sampled_raw_demos
demo_str = ""
if len(samples) > 0:

demo_str = self.samples_to_str(samples=samples)

demo_str = ""
if len(sampled_augmented_demos) > 0:

demo_str = self.samples_to_str(
samples=sampled_augmented_demos,
augmented=True,
exclude_inputs=self.exclude_input_fields_from_bootstrap_demos,
)
if len(sampled_raw_demos) > 0:
demo_str += "\n" + self.samples_to_str(
samples=sampled_raw_demos, augmented=False
)
demo_str = demo_str.strip()
demo_param.propose_data(demo_str, samples)
except Exception as e:
print(f"Error: {e} for {demo_param.name}")
Expand Down
3 changes: 3 additions & 0 deletions adalflow/adalflow/optim/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ class Parameter(Generic[T]):
from_response_id: str = (
None # for parameterType GRADIENT, the id of the response parameter
)
backward_engine_disabled: bool = (
False # Disable the backward engine for the parameter
)

def __init__(
self,
Expand Down
3 changes: 2 additions & 1 deletion adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def backward(
log.info(f"response_gradient_context: {response_gradient_context}")

# go through all child parameters
if backward_engine:
if backward_engine and not response.backward_engine_disabled:
# Convert all input arguments to string
inputs_string = "\n\n".join(
[
Expand All @@ -357,6 +357,7 @@ def backward(
f"EvalFnToTextLoss: Skipping {pred} as it does not require optimization."
)
continue

self._backward_through_one_predecessor(
pred,
inputs_string,
Expand Down
10 changes: 6 additions & 4 deletions adalflow/adalflow/optim/trainer/adal.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def train_step(self, batch, batch_idx, num_workers: int = 2) -> List:
tqdm_loader = tqdm(
total=len(futures),
position=0,
desc="Evaluating",
desc=f"Evaluating step: {batch_idx}",
)
for future, i, sample in futures:
y_pred = future.result()
Expand Down Expand Up @@ -287,10 +287,10 @@ def train_step(self, batch, batch_idx, num_workers: int = 2) -> List:
break

tqdm_loader.set_description(
f"Evaluating: {round(eval_score,4)} across {len(completed_indices)} samples, Max potential: {round(max_score,4)}"
f"Evaluating step({batch_idx}): {round(eval_score,4)} across {len(completed_indices)} samples, Max potential: {round(max_score,4)}"
)
else:
tqdm_loader.set_description("Evaluating")
tqdm_loader.set_description(f"Evaluating step({batch_idx})")

tqdm_loader.update(1) # Update the progress bar

Expand Down Expand Up @@ -500,9 +500,11 @@ def _on_completion_callback(
)

# Register the callback for each generator
call_logger = GeneratorCallLogger(save_dir=save_dir)

file_paths = []
for name, generator in all_generators:
call_logger = GeneratorCallLogger(save_dir=save_dir)
call_logger.reset()
call_logger.register_generator(name)
logger_call = partial(call_logger.log_call, name)
generator.register_callback(
Expand Down
Loading

0 comments on commit 93a6de0

Please sign in to comment.