SylphAI-Inc · liyin2015 · Nov 22, 2024 · Nov 21, 2024 · Nov 21, 2024 · Nov 22, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -14,13 +14,16 @@ repos:
     hooks:
       - id: black
         args: ['--line-length=88']
+        exclude: ^docs/|.*\.(json|yaml|md|txt)$
 
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.2
+  # Add local hooks to run custom commands
+  - repo: local
     hooks:
-      # Run the linter.
-      - id: ruff
-        args: ['--fix', '--extend-ignore=E402']
+      - id: run-make-format
+        name: Run Make Format
+        entry: make format
+        language: system
+        pass_filenames: false
   # - repo: https://github.com/pycqa/flake8
   #   rev: 4.0.1
   #   hooks:

diff --git a/Makefile b/Makefile
@@ -0,0 +1,51 @@
+# Define variables for common directories and commands
+PYTHON = poetry run
+SRC_DIR = .
+
+# Default target: Show help
+.PHONY: help
+help:
+	@echo "Available targets:"
+	@echo "  setup            Install dependencies and set up pre-commit hooks"
+	@echo "  format           Run Black and Ruff to format the code"
+	@echo "  lint             Run Ruff to check code quality"
+	@echo "  test             Run tests with pytest"
+	@echo "  precommit        Run pre-commit hooks on all files"
+	@echo "  clean            Clean up temporary files and build artifacts"
+
+# Install dependencies and set up pre-commit hooks
+.PHONY: setup
+setup:
+	poetry install
+	poetry run pre-commit install
+
+# Format code using Black and Ruff
+.PHONY: format
+format:
+	$(PYTHON) black $(SRC_DIR)
+	git ls-files | xargs pre-commit run black --files
+
+# Run lint checks using Ruff
+.PHONY: lint
+lint:
+	$(PYTHON) ruff check $(SRC_DIR)
+
+# Run all pre-commit hooks on all files
+.PHONY: precommit
+precommit:
+	$(PYTHON) pre-commit run --all-files
+
+# Run tests
+.PHONY: test
+test:
+	$(PYTHON) pytest
+
+# Clean up temporary files and build artifacts
+.PHONY: clean
+clean:
+	rm -rf .pytest_cache
+	rm -rf .mypy_cache
+	rm -rf __pycache__
+	rm -rf build dist *.egg-info
+	find . -type d -name "__pycache__" -exec rm -r {} +
+	find . -type f -name "*.pyc" -delete
diff --git a/adalflow/PACKAGING.md b/adalflow/PACKAGING.md
@@ -33,10 +33,10 @@ pip install "dist/adalflow-0.1.0b1-py3-none-any.whl[openai,groq,faiss]"
 
 1. Update the version in `pyproject.toml`
 2. Add the version number in `adalflow/__init__.py`
-3. Build the package 
+3. Build the package
 4. Test the package locally
 5. Push the changes to the repository
 6. Ensure to run `poetry lock --no-update` in the root directory (project-level) to update the lock file for other directories such as `tutorials`, `use_cases`, `benchmarks`, etc.
 7. Update the `CHANGELOG.md` file with the new version number and the changes made in the new version.
 
-## TODO: we need to automate the version update process. Help is appreciated.
+## TODO: we need to automate the version update process. Help is appreciated.
diff --git a/adalflow/adalflow/components/model_client/anthropic_client.py b/adalflow/adalflow/components/model_client/anthropic_client.py
@@ -15,7 +15,8 @@
 anthropic = safe_import(
     OptionalPackages.ANTHROPIC.value[0], OptionalPackages.ANTHROPIC.value[1]
 )
-import anthropic
+
+# import anthropic
 from anthropic import (
     RateLimitError,
     APITimeoutError,
@@ -167,4 +168,4 @@ async def acall(
         elif model_type == ModelType.LLM:
             return await self.async_client.messages.create(**api_kwargs)
         else:
-            raise ValueError(f"model_type {model_type} is not supported")
+            raise ValueError(f"model_type {model_type} is not supported")
diff --git a/adalflow/adalflow/components/model_client/bedrock_client.py b/adalflow/adalflow/components/model_client/bedrock_client.py
@@ -15,17 +15,21 @@
 
 bedrock_runtime_exceptions = boto3.client(
     service_name="bedrock-runtime",
-    region_name=os.getenv("AWS_REGION_NAME", "us-east-1")
+    region_name=os.getenv("AWS_REGION_NAME", "us-east-1"),
 ).exceptions
 
 
 def get_first_message_content(completion: Dict) -> str:
     r"""When we only need the content of the first message.
     It is the default parser for chat completion."""
-    return completion['output']['message']['content'][0]['text']
+    return completion["output"]["message"]["content"][0]["text"]
 
 
-__all__ = ["BedrockAPIClient", "get_first_message_content", "bedrock_runtime_exceptions"]
+__all__ = [
+    "BedrockAPIClient",
+    "get_first_message_content",
+    "bedrock_runtime_exceptions",
+]
 
 
 class BedrockAPIClient(ModelClient):
@@ -34,15 +38,15 @@ class BedrockAPIClient(ModelClient):
     """
 
     def __init__(
-            self,
-            aws_profile_name=None,
-            aws_region_name=None,
-            aws_access_key_id=None,
-            aws_secret_access_key=None,
-            aws_session_token=None,
-            aws_connection_timeout=None,
-            aws_read_timeout=None,
-            chat_completion_parser: Callable = None,
+        self,
+        aws_profile_name=None,
+        aws_region_name=None,
+        aws_access_key_id=None,
+        aws_secret_access_key=None,
+        aws_session_token=None,
+        aws_connection_timeout=None,
+        aws_read_timeout=None,
+        chat_completion_parser: Callable = None,
     ):
         super().__init__()
         self._aws_profile_name = aws_profile_name
@@ -56,7 +60,7 @@ def __init__(
         self.session = None
         self.sync_client = self.init_sync_client()
         self.chat_completion_parser = (
-                chat_completion_parser or get_first_message_content
+            chat_completion_parser or get_first_message_content
         )
 
     def init_sync_client(self):
@@ -67,14 +71,16 @@ def init_sync_client(self):
         aws_profile_name = self._aws_profile_name or os.getenv("AWS_PROFILE_NAME")
         aws_region_name = self._aws_region_name or os.getenv("AWS_REGION_NAME")
         aws_access_key_id = self._aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID")
-        aws_secret_access_key = self._aws_secret_access_key or os.getenv("AWS_SECRET_ACCESS_KEY")
+        aws_secret_access_key = self._aws_secret_access_key or os.getenv(
+            "AWS_SECRET_ACCESS_KEY"
+        )
         aws_session_token = self._aws_session_token or os.getenv("AWS_SESSION_TOKEN")
 
         config = None
         if self._aws_connection_timeout or self._aws_read_timeout:
             config = Config(
                 connect_timeout=self._aws_connection_timeout,  # Connection timeout in seconds
-                read_timeout=self._aws_read_timeout  # Read timeout in seconds
+                read_timeout=self._aws_read_timeout,  # Read timeout in seconds
             )
 
         session = boto3.Session(
@@ -93,7 +99,7 @@ def init_async_client(self):
     def parse_chat_completion(self, completion):
         log.debug(f"completion: {completion}")
         try:
-            data = completion['output']['message']['content'][0]['text']
+            data = completion["output"]["message"]["content"][0]["text"]
             usage = self.track_completion_usage(completion)
             return GeneratorOutput(data=None, usage=usage, raw_response=data)
         except Exception as e:
@@ -104,18 +110,18 @@ def parse_chat_completion(self, completion):
 
     def track_completion_usage(self, completion: Dict) -> CompletionUsage:
         r"""Track the completion usage."""
-        usage = completion['usage']
+        usage = completion["usage"]
         return CompletionUsage(
-            completion_tokens=usage['outputTokens'],
-            prompt_tokens=usage['inputTokens'],
-            total_tokens=usage['totalTokens']
+            completion_tokens=usage["outputTokens"],
+            prompt_tokens=usage["inputTokens"],
+            total_tokens=usage["totalTokens"],
         )
 
     def convert_inputs_to_api_kwargs(
-            self,
-            input: Optional[Any] = None,
-            model_kwargs: Dict = {},
-            model_type: ModelType = ModelType.UNDEFINED
+        self,
+        input: Optional[Any] = None,
+        model_kwargs: Dict = {},
+        model_type: ModelType = ModelType.UNDEFINED,
     ):
         """
         check the converse api doc here:
@@ -133,11 +139,11 @@ def convert_inputs_to_api_kwargs(
     @backoff.on_exception(
         backoff.expo,
         (
-                bedrock_runtime_exceptions.ThrottlingException,
-                bedrock_runtime_exceptions.ModelTimeoutException,
-                bedrock_runtime_exceptions.InternalServerException,
-                bedrock_runtime_exceptions.ModelErrorException,
-                bedrock_runtime_exceptions.ValidationException
+            bedrock_runtime_exceptions.ThrottlingException,
+            bedrock_runtime_exceptions.ModelTimeoutException,
+            bedrock_runtime_exceptions.InternalServerException,
+            bedrock_runtime_exceptions.ModelErrorException,
+            bedrock_runtime_exceptions.ValidationException,
         ),
         max_time=5,
     )

diff --git a/adalflow/adalflow/optim/optimizer.py b/adalflow/adalflow/optim/optimizer.py
@@ -67,7 +67,7 @@ def __init__(
         dataset: Sequence[DataClass] = None,
         exclude_input_fields_from_bootstrap_demos: bool = False,
         *args,
-        **kwargs
+        **kwargs,
     ):
         self._weighted = weighted
         self.dataset = dataset

diff --git a/adalflow/adalflow/utils/lazy_import.py b/adalflow/adalflow/utils/lazy_import.py
@@ -78,7 +78,7 @@ class LazyImport:
     """
 
     def __init__(
-            self, import_path: str, optional_package: OptionalPackages, *args, **kwargs
+        self, import_path: str, optional_package: OptionalPackages, *args, **kwargs
     ):
         if args or kwargs:
             raise TypeError(

diff --git a/adalflow/tests/test_random_sample.py b/adalflow/tests/test_random_sample.py
@@ -1,13 +1,12 @@
 import unittest
 from typing import TypeVar
+from adalflow.core.functional import random_sample
+
 
 # Assuming the random_sample function is defined here or imported
 T_co = TypeVar("T_co", covariant=True)
 
 
-from adalflow.core.functional import random_sample
-
-
 class TestRandomSample(unittest.TestCase):
 
     def setUp(self):

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -1,3 +1,3 @@
-Benchmarking is an integral development part of the project. 
+Benchmarking is an integral development part of the project.
 
-Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
+Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
diff --git a/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json b/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json
@@ -499,4 +499,4 @@
         "answer": "grand assembly",
         "type": "bridge"
     }
-]
+]
diff --git a/benchmarks/ReAct_agent/paper_data/paper_dev_10.json b/benchmarks/ReAct_agent/paper_data/paper_dev_10.json
@@ -429,4 +429,4 @@
             ]
         ]
     }
-]
+]
diff --git a/benchmarks/ReAct_agent/utils/tools.py b/benchmarks/ReAct_agent/utils/tools.py
@@ -9,15 +9,17 @@
 Apply the similar code for wikipedia search from the Paper (open-source).
 """
 
+
 # copy code from the paper
 def clean_str(p):
-  return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")
+    return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")
+
 
 # normalization copied from the paper's code
 def normalize_answer(s):
     def remove_articles(text):
         return re.sub(r"\b(a|an|the)\b", " ", text)
-  
+
     def white_space_fix(text):
         return " ".join(text.split())
 
@@ -39,58 +41,70 @@ def search(entity: str) -> str:
     # Format the entity for URL encoding
     entity_formatted = entity.replace(" ", "+")
     url = f"https://en.wikipedia.org/w/index.php?search={entity_formatted}"
-    
+
     # Fetch the page
     response = requests.get(url)
-    soup = BeautifulSoup(response.text, 'html.parser')
-    
+    soup = BeautifulSoup(response.text, "html.parser")
+
     # Check if the exact page was found or suggest similar items
     # when <div class=mw-search-result-heading> is detected, it means the entity page is not found on wikipedia
     result_divs = soup.find_all("div", {"class": "mw-search-result-heading"})
-
-    if result_divs: # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities
+
+    if (
+        result_divs
+    ):  # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities
         # get Similar results
         similar_titles = [div.a.get_text() for div in result_divs]
-        return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}" # return the top 5 similar titles
+        return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}"  # return the top 5 similar titles
     else:
         # the paper uses page to represent content in <p>
         # Extract xontent
-        page_list = [p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")]
+        page_list = [
+            p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")
+        ]
         # TODO: Recursive search, if find any concept that needs more search then call search again
         # if any("may refer to:" in p for p in page_list):
         #     search(entity)
 
         # restructure & clean the page content following the paper's logic
-        page = ''
+        page = ""
         for p in page_list:
             if len(p.split(" ")) > 2:
                 page += clean_str(p)
                 if not p.endswith("\n"):
                     page += "\n"
         paragraphs = page.split("\n")
         paragraphs = [p.strip() for p in paragraphs if p.strip()]
-        
+
         sentences = []
         for p in paragraphs:
-            sentences += p.split('. ')
-        sentences = [s.strip() + '.' for s in sentences if s.strip()]
-        
+            sentences += p.split(". ")
+        sentences = [s.strip() + "." for s in sentences if s.strip()]
+
         # return the first 5 sentences
         if sentences:
-            return ' '.join(sentences[:5]) if len(sentences)>=5 else ' '.join(sentences)
+            return (
+                " ".join(sentences[:5]) if len(sentences) >= 5 else " ".join(sentences)
+            )
         else:
             return "No content found on this page."
-        
+
         # TODO: clean the paragraphs and return the searched content
 
 
 def lookup(text: str, keyword: str) -> str:
     """
-        returns the sentences containing keyword in the current passage.
+    returns the sentences containing keyword in the current passage.
     """
-    sentences = text.split('.')
-    matching_sentences = [sentence.strip() + '.' for sentence in sentences if keyword.lower() in sentence.lower()]
+    sentences = text.split(".")
+    matching_sentences = [
+        sentence.strip() + "."
+        for sentence in sentences
+        if keyword.lower() in sentence.lower()
+    ]
     if not matching_sentences:
         return "No sentences found with the keyword."
     else:
-        return ' '.join(matching_sentences)  # Join all matching sentences into a single string
+        return " ".join(
+            matching_sentences
+        )  # Join all matching sentences into a single string
-Original file line number
+Diff line change
@@ Expand Up / @@ -499,4 +499,4 @@ @@
             "answer": "grand assembly",
             "type": "bridge"
         }
-    ]
+    ]