SylphAI-Inc · Alleria1809 · Jul 3, 2024 · Jun 27, 2024 · Jul 2, 2024 · Jul 2, 2024
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -3,7 +3,7 @@ name: Documentation
 on:
   push:
     branches:
-      - xiaoyi_doc  # Ensure this is the branch where you commit documentation updates
+      - release  # Trigger the workflow when changes are pushed to the release branch
 
 permissions:
   contents: write
@@ -17,52 +17,53 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v4
         with:
-          fetch-depth: 0
+          fetch-depth: 0  # Fetch all history for all branches and tags
 
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.11'
+          python-version: '3.11'  # Ensure the Python version is correct
 
       - name: Install Poetry
         run: |
           curl -sSL https://install.python-poetry.org | python3 -
-          echo "$HOME/.local/bin" >> $GITHUB_PATH
+          echo "$HOME/.local/bin" >> $GITHUB_PATH  # Ensure Poetry's bin directory is in PATH
 
       - name: Install dependencies using Poetry
         run: |
-          poetry config virtualenvs.create false
-          poetry install
+          poetry config virtualenvs.create false  # Avoid creating a virtual environment
+          poetry install  # Install dependencies as specified in pyproject.toml
 
       - name: Build documentation using Makefile
         run: |
           echo "Building documentation from: $(pwd)"
           ls -l  # Debug: List current directory contents
-          poetry run make -C docs html
+          poetry run make -C docs html  # Run Makefile in docs directory to build HTML docs
         working-directory: ${{ github.workspace }}
 
       - name: List built documentation
         run: |
-          find ./build/ -type f
+          find ./build/ -type f  # List all files in the build directory
         working-directory: ${{ github.workspace }}/docs
 
       - name: Create .nojekyll file
         run: |
-          touch .nojekyll
+          touch .nojekyll  # Prevent GitHub Pages from ignoring files that start with an underscore
         working-directory: ${{ github.workspace }}/docs/build
 
       - name: Deploy to GitHub Pages
         uses: peaceiris/actions-gh-pages@v3
         with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_branch: gh-pages
-          publish_dir: ./docs/build/
-          user_name: github-actions[bot]
-          user_email: github-actions[bot]@users.noreply.github.com
+          github_token: ${{ secrets.GITHUB_TOKEN }}  # GitHub token for authentication
+          publish_branch: gh-pages  # Target branch for GitHub Pages deployment
+          publish_dir: ./docs/build/  # Directory containing the built documentation
+          user_name: github-actions[bot]  # Username for the commit
+          user_email: github-actions[bot]@users.noreply.github.com  # Email for the commit
 
-      # - name: Debug Output
-      #   run: |
-      #     pwd  # Print the current working directory
-      #     ls -l  # List files in the build directory
-      #     cat ./source/conf.py  # Show Sphinx config file for debugging
-      #   working-directory: ${{ github.workspace }}/docs/build
+# Uncomment below for debugging purposes
+#      - name: Debug Output
+#        run: |
+#          pwd  # Print the current working directory
+#          ls -l ./build/  # List files in the build directory
+#          cat ./source/conf.py  # Display the Sphinx configuration file
+#        working-directory: ${{ github.workspace }}/docs/build
diff --git a/docs/Makefile b/docs/Makefile
@@ -34,8 +34,11 @@ apidoc:
 	@python $(SOURCEDIR)/remove_string.py
 	@echo "Removing duplicated files"
 	@python $(SOURCEDIR)/remove_files.py
-
-
+	@echo "Renaming and updating file"
+	@python $(SOURCEDIR)/change_api_file_name.py
+	# @echo "Renaming and updating file"
+	# @python $(SOURCEDIR)/change_api_file_name_autosummary.py
+
 
 html: apidoc
 	@$(SPHINXBUILD) -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/source/apis/index.rst b/docs/source/apis/index.rst
@@ -10,26 +10,28 @@ Core
 The core section of the LightRAG API documentation provides detailed information about the foundational components of the LightRAG system. These components are essential for the basic operations and serve as the building blocks for higher-level functionalities.
 
 .. autosummary::
-
+   core.component
    core.base_data_class
+   core.default_prompt_template
    core.model_client
-   core.component
-   core.data_components
+
+   .. core.data_components
+
    core.db
-   core.default_prompt_template
-   core.embedder
    core.functional
+
    core.generator
-   core.memory
-   core.parameter
-   core.prompt_builder
-   core.retriever
    core.string_parser
+   core.embedder
+   core.retriever
+   .. core.memory
+
+   core.prompt_builder
    core.tokenizer
    core.func_tool
    core.tool_manager
    core.types
-
+   core.parameter
 
 Components
 -----------
@@ -38,13 +40,28 @@ The components section of the LightRAG API documentation outlines the detailed s
 
 .. autosummary::
 
-   components.agent
-   components.model_client
-   componnets.data_process
-   .. components.reasoning
-
-   components.retriever
-   components.output_parsers
+   components.agent.react
+
+   components.model_client.anthropic_client
+   components.model_client.cohere_client
+   components.model_client.google_client
+   components.model_client.groq_client
+   components.model_client.openai_client
+   components.model_client.transformers_client
+   components.model_client.utils
+
+   components.data_process.data_components
+   components.data_process.text_splitter
+
+   components.reasoning.chain_of_thought
+
+   components.retriever.bm25_retriever
+   components.retriever.faiss_retriever
+   components.retriever.llm_retriever
+   components.retriever.postgres_retriever
+   components.retriever.reranker_retriever
+
+   components.output_parsers.outputs
 
 
 Evaluation

diff --git a/docs/source/change_api_file_name.py b/docs/source/change_api_file_name.py
@@ -0,0 +1,51 @@
+import os
+import re
+
+def update_file_content(directory: str):
+    module_name = directory.split("/")[-1] if "_autosummary" not in directory else "components"
+    # print(f"directory: {directory}; module_name {module_name}")
+    for filename in os.listdir(directory):
+        # print(filename)
+        if filename.endswith(".rst") and "index" not in filename:
+            filepath = os.path.join(directory, filename)
+            # print(filepath)
+            with open(filepath, "r+", encoding='utf-8') as file:
+                lines = file.readlines()
+                modified = False  # To track if modifications have been made
+                for i in range(len(lines) - 1):
+                    line = lines[i].strip()
+                    next_line = lines[i + 1].strip()
+
+                    # Check if the next line is a title underline
+                    if next_line == "=" * len(next_line) and not modified:
+                        # Check if the current line starts with the module_name
+                        if line.startswith(module_name):
+                            # Replace the full module path with only the last segment
+                            new_title = line.split('.')[-1]
+                            # print(f"new_title: {new_title}")
+                            lines[i] = new_title + '\n'  # Update the title line
+                            modified = True  # Mark that modification has been made
+                            # No need to break since we are preserving the rest of the content
+
+                # Rewind and update the file only if modifications were made
+                if modified:
+                    file.seek(0)
+                    file.writelines(lines)
+                    file.truncate()  # Ensure the file is cut off at the new end if it's shorter
+                    print(f"Updated {filepath}")
+
+
+
+if __name__ == "__main__":
+    # Specify the directory or directories you want to process
+    directories = [
+        "./source/apis/core",
+        "./source/apis/components",
+        "./source/apis/utils",
+        "./source/apis/eval",
+        "./source/apis/tracing",
+        "./source/apis/optim",
+        # "./source/apis/components/_autosummary",
+    ]
+    for diretory in directories:
+        update_file_content(diretory)   
diff --git a/docs/source/remove_files.py b/docs/source/remove_files.py
@@ -40,12 +40,18 @@ def remove_file(directory: str):
         "components.retriever.bm25_retriever.rst",
         "components.model_client.google_client.rst",
         "components.model_client.transformers_client.rst",
+        "components.model_client.utils.rst",
         "components.retriever.llm_retriever.rst",
         "components.agent.react.rst",
         "components.model_client.anthropic_client.rst",
         "components.output_parsers.outputs.rst",
         "components.model_client.cohere_client.rst",
-        "components.retriever.reranker_retriever.rst",
+        "components.retriever.reranker_retriever.rst",   
+        "components.data_process.data_components.rst",
+        "components.data_process.text_splitter.rst",
+        "components.memory.memory.rst",
+        "components.retriever.postgres_retriever.rst"
+
     ]
     try:
         for filename in os.listdir(directory):

diff --git a/lightrag/lightrag/components/reasoning/chain_of_thought.py b/lightrag/lightrag/components/reasoning/chain_of_thought.py
@@ -1,4 +1,6 @@
 """
+Chain of the thought(CoT) is to mimic a step-by-step thought process for arriving at the answer.
+
 https://arxiv.org/abs/2201.11903, published in Jan, 2023
 
 Chain of the thought(CoT) is to mimic a step-by-step thought process for arriving at the answer. You can achieve it in two ways:

diff --git a/lightrag/lightrag/core/component.py b/lightrag/lightrag/core/component.py
@@ -1,3 +1,5 @@
+"""Component is to LLM task pipelines what nn.Module is to PyTorch models."""
+
 from collections import OrderedDict, namedtuple
 from typing import (
     Callable,

diff --git a/lightrag/lightrag/core/default_prompt_template.py b/lightrag/lightrag/core/default_prompt_template.py
@@ -1,3 +1,8 @@
+"""This is the default system prompt template used in the LightRAG.
+
+Use :ref:`Prompt <core-prompt_builder>` class  to manage it.
+"""
+
 # TODO: potentially make a data class for this
 LIGHTRAG_DEFAULT_PROMPT_ARGS = [
     "task_desc_str",  # task description
@@ -75,5 +80,5 @@
 """
 """This is the default system prompt template used in the LightRAG.
 
-Use :ref:`Prompt<core-prompt_builder>` class  to manage it.
+Use :ref:`Prompt <core-prompt_builder>` class  to manage it.
 """
diff --git a/lightrag/lightrag/core/generator.py b/lightrag/lightrag/core/generator.py
@@ -1,3 +1,6 @@
+"""Generator is a user-facing orchestration component with a simple and unified interface for LLM prediction. 
+
+It is a pipeline that consists of three subcomponents."""
 from typing import Any, Dict, List, Optional, Union
 from copy import deepcopy
 import logging

diff --git a/lightrag/lightrag/core/parameter.py b/lightrag/lightrag/core/parameter.py
@@ -1,3 +1,4 @@
+"""WIP"""
 from typing import Generic, TypeVar, Any
 
 T = TypeVar("T")  # covariant set to False to allow for in-place updates

diff --git a/lightrag/lightrag/core/tokenizer.py b/lightrag/lightrag/core/tokenizer.py
@@ -1,3 +1,6 @@
+"""
+Tokenizer from tiktoken.
+"""
 import tiktoken
 from typing import List
 

diff --git a/lightrag/lightrag/core/tool_manager.py b/lightrag/lightrag/core/tool_manager.py
@@ -1,3 +1,6 @@
+"""
+The ToolManager manages a list of tools, context, and all ways to execute functions.
+"""
 from typing import List, Dict, Optional, Any, Callable, Awaitable, Union
 import logging
 from copy import deepcopy

diff --git a/lightrag/lightrag/optim/llm_optimizer.py b/lightrag/lightrag/optim/llm_optimizer.py
@@ -1,5 +1,7 @@
 r"""
-Based and optimized from ORPO llm optimizer: https://arxiv.org/abs/2309.03409
+Based and optimized from ORPO llm optimizer.
+
+https://arxiv.org/abs/2309.03409
 Source code: https://github.com/google-deepmind/opro
 """
 

diff --git a/poetry.lock b/poetry.lock