Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue 273] Apply consistent coding and formatting #274

Merged
merged 9 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,16 @@ repos:
hooks:
- id: black
args: ['--line-length=88']
exclude: ^docs/|.*\.(json|yaml|md|txt)$

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.2
# Add local hooks to run custom commands
- repo: local
hooks:
# Run the linter.
- id: ruff
args: ['--fix', '--extend-ignore=E402']
- id: run-make-format
name: Run Make Format
entry: make format
language: system
pass_filenames: false
# - repo: https://github.com/pycqa/flake8
# rev: 4.0.1
# hooks:
Expand Down
51 changes: 51 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Define variables for common directories and commands
PYTHON = poetry run
SRC_DIR = .

# Default target: Show help
.PHONY: help
help:
@echo "Available targets:"
@echo " setup Install dependencies and set up pre-commit hooks"
@echo " format Run Black and Ruff to format the code"
@echo " lint Run Ruff to check code quality"
@echo " test Run tests with pytest"
@echo " precommit Run pre-commit hooks on all files"
@echo " clean Clean up temporary files and build artifacts"

# Install dependencies and set up pre-commit hooks
.PHONY: setup
setup:
poetry install
poetry run pre-commit install

# Format code using Black and Ruff
.PHONY: format
format:
$(PYTHON) black $(SRC_DIR)
git ls-files | xargs pre-commit run black --files

# Run lint checks using Ruff
.PHONY: lint
lint:
$(PYTHON) ruff check $(SRC_DIR)

# Run all pre-commit hooks on all files
.PHONY: precommit
precommit:
$(PYTHON) pre-commit run --all-files

# Run tests
.PHONY: test
test:
$(PYTHON) pytest

# Clean up temporary files and build artifacts
.PHONY: clean
clean:
rm -rf .pytest_cache
rm -rf .mypy_cache
rm -rf __pycache__
rm -rf build dist *.egg-info
find . -type d -name "__pycache__" -exec rm -r {} +
find . -type f -name "*.pyc" -delete
4 changes: 2 additions & 2 deletions adalflow/PACKAGING.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ pip install "dist/adalflow-0.1.0b1-py3-none-any.whl[openai,groq,faiss]"

1. Update the version in `pyproject.toml`
2. Add the version number in `adalflow/__init__.py`
3. Build the package
3. Build the package
4. Test the package locally
5. Push the changes to the repository
6. Ensure to run `poetry lock --no-update` in the root directory (project-level) to update the lock file for other directories such as `tutorials`, `use_cases`, `benchmarks`, etc.
7. Update the `CHANGELOG.md` file with the new version number and the changes made in the new version.

## TODO: we need to automate the version update process. Help is appreciated.
## TODO: we need to automate the version update process. Help is appreciated.
5 changes: 3 additions & 2 deletions adalflow/adalflow/components/model_client/anthropic_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
anthropic = safe_import(
OptionalPackages.ANTHROPIC.value[0], OptionalPackages.ANTHROPIC.value[1]
)
import anthropic

# import anthropic
from anthropic import (
RateLimitError,
APITimeoutError,
Expand Down Expand Up @@ -167,4 +168,4 @@ async def acall(
elif model_type == ModelType.LLM:
return await self.async_client.messages.create(**api_kwargs)
else:
raise ValueError(f"model_type {model_type} is not supported")
raise ValueError(f"model_type {model_type} is not supported")
64 changes: 35 additions & 29 deletions adalflow/adalflow/components/model_client/bedrock_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,21 @@

bedrock_runtime_exceptions = boto3.client(
service_name="bedrock-runtime",
region_name=os.getenv("AWS_REGION_NAME", "us-east-1")
region_name=os.getenv("AWS_REGION_NAME", "us-east-1"),
).exceptions


def get_first_message_content(completion: Dict) -> str:
r"""When we only need the content of the first message.
It is the default parser for chat completion."""
return completion['output']['message']['content'][0]['text']
return completion["output"]["message"]["content"][0]["text"]


__all__ = ["BedrockAPIClient", "get_first_message_content", "bedrock_runtime_exceptions"]
__all__ = [
"BedrockAPIClient",
"get_first_message_content",
"bedrock_runtime_exceptions",
]


class BedrockAPIClient(ModelClient):
Expand All @@ -34,15 +38,15 @@ class BedrockAPIClient(ModelClient):
"""

def __init__(
self,
aws_profile_name=None,
aws_region_name=None,
aws_access_key_id=None,
aws_secret_access_key=None,
aws_session_token=None,
aws_connection_timeout=None,
aws_read_timeout=None,
chat_completion_parser: Callable = None,
self,
aws_profile_name=None,
aws_region_name=None,
aws_access_key_id=None,
aws_secret_access_key=None,
aws_session_token=None,
aws_connection_timeout=None,
aws_read_timeout=None,
chat_completion_parser: Callable = None,
):
super().__init__()
self._aws_profile_name = aws_profile_name
Expand All @@ -56,7 +60,7 @@ def __init__(
self.session = None
self.sync_client = self.init_sync_client()
self.chat_completion_parser = (
chat_completion_parser or get_first_message_content
chat_completion_parser or get_first_message_content
)

def init_sync_client(self):
Expand All @@ -67,14 +71,16 @@ def init_sync_client(self):
aws_profile_name = self._aws_profile_name or os.getenv("AWS_PROFILE_NAME")
aws_region_name = self._aws_region_name or os.getenv("AWS_REGION_NAME")
aws_access_key_id = self._aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_access_key = self._aws_secret_access_key or os.getenv("AWS_SECRET_ACCESS_KEY")
aws_secret_access_key = self._aws_secret_access_key or os.getenv(
"AWS_SECRET_ACCESS_KEY"
)
aws_session_token = self._aws_session_token or os.getenv("AWS_SESSION_TOKEN")

config = None
if self._aws_connection_timeout or self._aws_read_timeout:
config = Config(
connect_timeout=self._aws_connection_timeout, # Connection timeout in seconds
read_timeout=self._aws_read_timeout # Read timeout in seconds
read_timeout=self._aws_read_timeout, # Read timeout in seconds
)

session = boto3.Session(
Expand All @@ -93,7 +99,7 @@ def init_async_client(self):
def parse_chat_completion(self, completion):
log.debug(f"completion: {completion}")
try:
data = completion['output']['message']['content'][0]['text']
data = completion["output"]["message"]["content"][0]["text"]
usage = self.track_completion_usage(completion)
return GeneratorOutput(data=None, usage=usage, raw_response=data)
except Exception as e:
Expand All @@ -104,18 +110,18 @@ def parse_chat_completion(self, completion):

def track_completion_usage(self, completion: Dict) -> CompletionUsage:
r"""Track the completion usage."""
usage = completion['usage']
usage = completion["usage"]
return CompletionUsage(
completion_tokens=usage['outputTokens'],
prompt_tokens=usage['inputTokens'],
total_tokens=usage['totalTokens']
completion_tokens=usage["outputTokens"],
prompt_tokens=usage["inputTokens"],
total_tokens=usage["totalTokens"],
)

def convert_inputs_to_api_kwargs(
self,
input: Optional[Any] = None,
model_kwargs: Dict = {},
model_type: ModelType = ModelType.UNDEFINED
self,
input: Optional[Any] = None,
model_kwargs: Dict = {},
model_type: ModelType = ModelType.UNDEFINED,
):
"""
check the converse api doc here:
Expand All @@ -133,11 +139,11 @@ def convert_inputs_to_api_kwargs(
@backoff.on_exception(
backoff.expo,
(
bedrock_runtime_exceptions.ThrottlingException,
bedrock_runtime_exceptions.ModelTimeoutException,
bedrock_runtime_exceptions.InternalServerException,
bedrock_runtime_exceptions.ModelErrorException,
bedrock_runtime_exceptions.ValidationException
bedrock_runtime_exceptions.ThrottlingException,
bedrock_runtime_exceptions.ModelTimeoutException,
bedrock_runtime_exceptions.InternalServerException,
bedrock_runtime_exceptions.ModelErrorException,
bedrock_runtime_exceptions.ValidationException,
),
max_time=5,
)
Expand Down
2 changes: 1 addition & 1 deletion adalflow/adalflow/optim/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(
dataset: Sequence[DataClass] = None,
exclude_input_fields_from_bootstrap_demos: bool = False,
*args,
**kwargs
**kwargs,
):
self._weighted = weighted
self.dataset = dataset
Expand Down
2 changes: 1 addition & 1 deletion adalflow/adalflow/utils/lazy_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class LazyImport:
"""

def __init__(
self, import_path: str, optional_package: OptionalPackages, *args, **kwargs
self, import_path: str, optional_package: OptionalPackages, *args, **kwargs
):
if args or kwargs:
raise TypeError(
Expand Down
5 changes: 2 additions & 3 deletions adalflow/tests/test_random_sample.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import unittest
from typing import TypeVar
from adalflow.core.functional import random_sample


# Assuming the random_sample function is defined here or imported
T_co = TypeVar("T_co", covariant=True)


from adalflow.core.functional import random_sample


class TestRandomSample(unittest.TestCase):

def setUp(self):
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Benchmarking is an integral development part of the project.
Benchmarking is an integral development part of the project.

Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
Original file line number Diff line number Diff line change
Expand Up @@ -499,4 +499,4 @@
"answer": "grand assembly",
"type": "bridge"
}
]
]
2 changes: 1 addition & 1 deletion benchmarks/ReAct_agent/paper_data/paper_dev_10.json
Original file line number Diff line number Diff line change
Expand Up @@ -429,4 +429,4 @@
]
]
}
]
]
54 changes: 34 additions & 20 deletions benchmarks/ReAct_agent/utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@
Apply the similar code for wikipedia search from the Paper (open-source).
"""


# copy code from the paper
def clean_str(p):
return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")
return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")


# normalization copied from the paper's code
def normalize_answer(s):
def remove_articles(text):
return re.sub(r"\b(a|an|the)\b", " ", text)

def white_space_fix(text):
return " ".join(text.split())

Expand All @@ -39,58 +41,70 @@ def search(entity: str) -> str:
# Format the entity for URL encoding
entity_formatted = entity.replace(" ", "+")
url = f"https://en.wikipedia.org/w/index.php?search={entity_formatted}"

# Fetch the page
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
soup = BeautifulSoup(response.text, "html.parser")

# Check if the exact page was found or suggest similar items
# when <div class=mw-search-result-heading> is detected, it means the entity page is not found on wikipedia
result_divs = soup.find_all("div", {"class": "mw-search-result-heading"})

if result_divs: # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities

if (
result_divs
): # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities
# get Similar results
similar_titles = [div.a.get_text() for div in result_divs]
return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}" # return the top 5 similar titles
return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}" # return the top 5 similar titles
else:
# the paper uses page to represent content in <p>
# Extract xontent
page_list = [p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")]
page_list = [
p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")
]
# TODO: Recursive search, if find any concept that needs more search then call search again
# if any("may refer to:" in p for p in page_list):
# search(entity)

# restructure & clean the page content following the paper's logic
page = ''
page = ""
for p in page_list:
if len(p.split(" ")) > 2:
page += clean_str(p)
if not p.endswith("\n"):
page += "\n"
paragraphs = page.split("\n")
paragraphs = [p.strip() for p in paragraphs if p.strip()]

sentences = []
for p in paragraphs:
sentences += p.split('. ')
sentences = [s.strip() + '.' for s in sentences if s.strip()]
sentences += p.split(". ")
sentences = [s.strip() + "." for s in sentences if s.strip()]

# return the first 5 sentences
if sentences:
return ' '.join(sentences[:5]) if len(sentences)>=5 else ' '.join(sentences)
return (
" ".join(sentences[:5]) if len(sentences) >= 5 else " ".join(sentences)
)
else:
return "No content found on this page."

# TODO: clean the paragraphs and return the searched content


def lookup(text: str, keyword: str) -> str:
"""
returns the sentences containing keyword in the current passage.
returns the sentences containing keyword in the current passage.
"""
sentences = text.split('.')
matching_sentences = [sentence.strip() + '.' for sentence in sentences if keyword.lower() in sentence.lower()]
sentences = text.split(".")
matching_sentences = [
sentence.strip() + "."
for sentence in sentences
if keyword.lower() in sentence.lower()
]
if not matching_sentences:
return "No sentences found with the keyword."
else:
return ' '.join(matching_sentences) # Join all matching sentences into a single string
return " ".join(
matching_sentences
) # Join all matching sentences into a single string
Loading
Loading