From b3c5698f3615205133fa318d519345d7ce53d77f Mon Sep 17 00:00:00 2001 From: Alleria Date: Sun, 30 Jun 2024 14:58:03 -0700 Subject: [PATCH 1/4] update the toml file for version control --- pyproject.toml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e2b148d06..47d753539 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,8 @@ [tool.poetry] -name = "lightrag-project" -version = "0.1.0" +name = "lightrag" +version = "0.0.0-alpha.2" +readme = "README.md" description = "A project to develop and test the lightrag library" authors = ["Your Name "] license = "MIT" @@ -11,8 +12,8 @@ packages = [ ] [tool.poetry.dependencies] -python = ">=3.11, <4.0" -lightrag = { path = "./lightrag", develop = true } +python = ">=3.10, <4.0" +# lightrag = { path = "./lightrag", develop = true } torch = "^2.3.1" flagembedding = "^1.2.10" # cohere = "^5.5.7" From b6b3c7000ff57947bbcfb3c0fed17136b82ac383 Mon Sep 17 00:00:00 2001 From: Alleria Date: Sun, 30 Jun 2024 15:01:51 -0700 Subject: [PATCH 2/4] update the toml file for version control --- poetry.lock | 115 +++++++++++++++++++++++----------------------------- 1 file changed, 50 insertions(+), 65 deletions(-) diff --git a/poetry.lock b/poetry.lock index db7926658..fb07fbd45 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "accelerate" @@ -136,6 +136,7 @@ files = [ [package.dependencies] aiosignal = ">=1.1.2" +async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} attrs = ">=17.3.0" frozenlist = ">=1.1.1" multidict = ">=4.5,<7.0" @@ -217,8 +218,10 @@ files = [ ] [package.dependencies] +exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} idna = ">=2.8" sniffio = ">=1.1" +typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} [package.extras] doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] @@ -341,6 +344,9 @@ files = [ {file = "async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224"}, ] +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} + [[package]] name = "async-timeout" version = "4.0.3" @@ -442,17 +448,6 @@ files = [ [package.extras] dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] -[[package]] -name = "backoff" -version = "2.2.1" -description = "Function decoration for backoff and retry" -optional = false -python-versions = ">=3.7,<4.0" -files = [ - {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, - {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, -] - [[package]] name = "beautifulsoup4" version = "4.12.3" @@ -1079,6 +1074,20 @@ files = [ {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"}, ] +[[package]] +name = "exceptiongroup" +version = "1.2.1" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, +] + +[package.extras] +test = ["pytest (>=6)"] + [[package]] name = "executing" version = "2.0.1" @@ -1420,8 +1429,14 @@ files = [ [package.dependencies] google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" -grpcio = {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""} -grpcio-status = {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""} +grpcio = [ + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, + {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, +] +grpcio-status = [ + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, + {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, +] proto-plus = ">=1.22.3,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" @@ -1903,6 +1918,7 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} decorator = "*" +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} jedi = ">=0.16" matplotlib-inline = "*" pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} @@ -2100,20 +2116,6 @@ files = [ {file = "json5-0.9.25.tar.gz", hash = "sha256:548e41b9be043f9426776f05df8635a00fe06104ea51ed24b67f908856e151ae"}, ] -[[package]] -name = "jsonlines" -version = "4.0.0" -description = "Library with helpers for the jsonlines file format" -optional = false -python-versions = ">=3.8" -files = [ - {file = "jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55"}, - {file = "jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74"}, -] - -[package.dependencies] -attrs = ">=19.2.0" - [[package]] name = "jsonpatch" version = "1.33" @@ -2401,6 +2403,7 @@ jupyterlab-server = ">=2.27.1,<3" notebook-shim = ">=0.2" packaging = "*" setuptools = ">=40.1.0" +tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""} tornado = ">=6.2.0" traitlets = "*" @@ -2584,6 +2587,7 @@ files = [ [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" +async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""} langchain-core = ">=0.2.10,<0.3.0" langchain-text-splitters = ">=0.2.0,<0.3.0" langsmith = ">=0.1.17,<0.2.0" @@ -2715,27 +2719,6 @@ cli = ["fire"] docs = ["requests (>=2.0.0)"] typing = ["mypy (>=1.0.0)", "types-setuptools"] -[[package]] -name = "lightrag" -version = "0.1.0" -description = "The 'PyTorch' library for LLM applications. RAG=Retriever-Agent-Generator." -optional = false -python-versions = ">=3.10, <4.0" -files = [] -develop = true - -[package.dependencies] -backoff = "^2.2.1" -jinja2 = "^3.1.3" -jsonlines = "^4.0.0" -numpy = "^1.26.4" -python-dotenv = "^1.0.1" -tiktoken = "^0.7.0" - -[package.source] -type = "directory" -url = "lightrag" - [[package]] name = "llama-cloud" version = "0.0.6" @@ -3905,6 +3888,7 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] @@ -4659,6 +4643,9 @@ files = [ {file = "pypdf-4.2.0.tar.gz", hash = "sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1"}, ] +[package.dependencies] +typing_extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} + [package.extras] crypto = ["PyCryptodome", "cryptography"] dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "pytest-socket", "pytest-timeout", "pytest-xdist", "wheel"] @@ -4680,20 +4667,6 @@ files = [ [package.dependencies] six = ">=1.5" -[[package]] -name = "python-dotenv" -version = "1.0.1" -description = "Read key-value pairs from a .env file and set them as environment variables" -optional = false -python-versions = ">=3.8" -files = [ - {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, - {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, -] - -[package.extras] -cli = ["click (>=5.0)"] - [[package]] name = "python-json-logger" version = "2.0.7" @@ -5625,6 +5598,7 @@ sphinxcontrib-htmlhelp = ">=2.0.0" sphinxcontrib-jsmath = "*" sphinxcontrib-qthelp = "*" sphinxcontrib-serializinghtml = ">=1.1.9" +tomli = {version = ">=2", markers = "python_version < \"3.11\""} [package.extras] docs = ["sphinxcontrib-websupport"] @@ -6145,6 +6119,17 @@ dev = ["tokenizers[testing]"] docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "torch" version = "2.3.1" @@ -6863,5 +6848,5 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" -python-versions = ">=3.11, <4.0" -content-hash = "0be4e6e956c9ecb269263f17c19dfa5506ae361a3a999c8e11fdae1ad46b3bcf" +python-versions = ">=3.10, <4.0" +content-hash = "c9b91c9447663f7ba92c1c89f5db9a016214295c6d3a63f80fa48728b27e0df2" From 145c1b2b7075f6f127fe1f721a7ac4297c642268 Mon Sep 17 00:00:00 2001 From: Alleria Date: Sun, 30 Jun 2024 15:03:34 -0700 Subject: [PATCH 3/4] update test --- lightrag/tests/test_transformer_client.py | 191 +++++++++++----------- 1 file changed, 93 insertions(+), 98 deletions(-) diff --git a/lightrag/tests/test_transformer_client.py b/lightrag/tests/test_transformer_client.py index cdbc1931d..166980e74 100644 --- a/lightrag/tests/test_transformer_client.py +++ b/lightrag/tests/test_transformer_client.py @@ -23,106 +23,101 @@ def setUp(self) -> None: "The red panda (Ailurus fulgens), also called the lesser panda, the red bear-cat, and the red cat-bear, is a mammal native to the eastern Himalayas and southwestern China.", ] - # def test_transformer_embedder(self): - # transformer_embedder_model = "thenlper/gte-base" - # transformer_embedder_model_component = TransformerEmbedder( - # model_name=transformer_embedder_model - # ) - # print( - # f"Testing transformer embedder with model {transformer_embedder_model_component}" - # ) - # print("Testing transformer embedder") - # output = transformer_embedder_model_component( - # model=transformer_embedder_model, input="Hello world" - # ) - # print(output) - - # def test_transformer_client(self): - # transformer_client = TransformersClient() - # print("Testing transformer client") - # # run the model - # kwargs = { - # "model": "thenlper/gte-base", - # # "mock": False, - # } - # api_kwargs = transformer_client.convert_inputs_to_api_kwargs( - # input="Hello world", - # model_kwargs=kwargs, - # model_type=ModelType.EMBEDDER, - # ) - # # print(api_kwargs) - # output = transformer_client.call( - # api_kwargs=api_kwargs, model_type=ModelType.EMBEDDER - # ) - - # # print(transformer_client) - # # print(output) - - # def test_transformer_reranker(self): - # transformer_reranker_model = "BAAI/bge-reranker-base" - # transformer_reranker_model_component = TransformerReranker() - # # print( - # # f"Testing transformer reranker with model {transformer_reranker_model_component}" - # # ) - - # model_kwargs = { - # "model": transformer_reranker_model, - # "documents": self.documents, - # "query": self.query, - # "top_k": 2, - # } - - # output = transformer_reranker_model_component( - # **model_kwargs, - # ) - # # assert output is a list of float with length 2 - # self.assertEqual(len(output), 2) - # self.assertEqual(type(output[0]), float) - - # def test_transformer_reranker_client(self): - # transformer_reranker_client = TransformersClient( - # model_name="BAAI/bge-reranker-base" - # ) - # print("Testing transformer reranker client") - # # run the model - # kwargs = { - # "model": "BAAI/bge-reranker-base", - # "documents": self.documents, - # "top_k": 2, - # } - # api_kwargs = transformer_reranker_client.convert_inputs_to_api_kwargs( - # input=self.query, - # model_kwargs=kwargs, - # model_type=ModelType.RERANKER, - # ) - # print(api_kwargs) - # self.assertEqual(api_kwargs["model"], "BAAI/bge-reranker-base") - # output = transformer_reranker_client.call( - # api_kwargs=api_kwargs, model_type=ModelType.RERANKER - # ) - # self.assertEqual(type(output), tuple) - - - # def test_transformer_llm_response(self): - # """Test the TransformerLLM model with zephyr-7b-beta for generating a response.""" - # transformer_llm_model = "HuggingFaceH4/zephyr-7b-beta" - # transformer_llm_model_component = TransformerLLM(model_name=transformer_llm_model) + def test_transformer_embedder(self): + transformer_embedder_model = "thenlper/gte-base" + transformer_embedder_model_component = TransformerEmbedder( + model_name=transformer_embedder_model + ) + print( + f"Testing transformer embedder with model {transformer_embedder_model_component}" + ) + print("Testing transformer embedder") + output = transformer_embedder_model_component( + model=transformer_embedder_model, input="Hello world" + ) + print(output) + + def test_transformer_client(self): + transformer_client = TransformersClient() + print("Testing transformer client") + # run the model + kwargs = { + "model": "thenlper/gte-base", + # "mock": False, + } + api_kwargs = transformer_client.convert_inputs_to_api_kwargs( + input="Hello world", + model_kwargs=kwargs, + model_type=ModelType.EMBEDDER, + ) + # print(api_kwargs) + output = transformer_client.call( + api_kwargs=api_kwargs, model_type=ModelType.EMBEDDER + ) + + # print(transformer_client) + # print(output) + + def test_transformer_reranker(self): + transformer_reranker_model = "BAAI/bge-reranker-base" + transformer_reranker_model_component = TransformerReranker() + # print( + # f"Testing transformer reranker with model {transformer_reranker_model_component}" + # ) + + model_kwargs = { + "model": transformer_reranker_model, + "documents": self.documents, + "query": self.query, + "top_k": 2, + } + + output = transformer_reranker_model_component( + **model_kwargs, + ) + # assert output is a list of float with length 2 + self.assertEqual(len(output), 2) + self.assertEqual(type(output[0]), float) + + def test_transformer_reranker_client(self): + transformer_reranker_client = TransformersClient( + model_name="BAAI/bge-reranker-base" + ) + print("Testing transformer reranker client") + # run the model + kwargs = { + "model": "BAAI/bge-reranker-base", + "documents": self.documents, + "top_k": 2, + } + api_kwargs = transformer_reranker_client.convert_inputs_to_api_kwargs( + input=self.query, + model_kwargs=kwargs, + model_type=ModelType.RERANKER, + ) + print(api_kwargs) + self.assertEqual(api_kwargs["model"], "BAAI/bge-reranker-base") + output = transformer_reranker_client.call( + api_kwargs=api_kwargs, model_type=ModelType.RERANKER + ) + self.assertEqual(type(output), tuple) + + + def test_transformer_llm_response(self): + """Test the TransformerLLM model with zephyr-7b-beta for generating a response.""" + transformer_llm_model = "HuggingFaceH4/zephyr-7b-beta" + transformer_llm_model_component = TransformerLLM(model_name=transformer_llm_model) - # # Define a sample input - # input_text = "Hello, what's the weather today?" + # Define a sample input + input_text = "Hello, what's the weather today?" - # # Test generating a response, providing the 'model' keyword - # # response = transformer_llm_model_component(input=input_text, model=transformer_llm_model) - # response = transformer_llm_model_component(input_text=input_text) - + response = transformer_llm_model_component(input_text=input_text) - # # Check if the response is valid - # self.assertIsInstance(response, str, "The response should be a string.") - # self.assertTrue(len(response) > 0, "The response should not be empty.") + # Check if the response is valid + self.assertIsInstance(response, str, "The response should be a string.") + self.assertTrue(len(response) > 0, "The response should not be empty.") - # # Optionally, print the response for visual verification during testing - # print(f"Generated response: {response}") + # Optionally, print the response for visual verification during testing + print(f"Generated response: {response}") - -if __name__ == '__main__': - unittest.main() \ No newline at end of file + \ No newline at end of file From 632e1b1156fd9f42db8afe94d56ac1273a0af6df Mon Sep 17 00:00:00 2001 From: Alleria Date: Sun, 30 Jun 2024 15:37:31 -0700 Subject: [PATCH 4/4] sync the names --- lightrag/components/agent/__init__.py | 4 ++-- lightrag/components/agent/react.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lightrag/components/agent/__init__.py b/lightrag/components/agent/__init__.py index 5f5c58449..796b004ce 100644 --- a/lightrag/components/agent/__init__.py +++ b/lightrag/components/agent/__init__.py @@ -1,8 +1,8 @@ -from .react import DEFAULT_REACT_AGENT_SYSTEM_PROMPT, ReactAgent +from .react import DEFAULT_REACT_AGENT_SYSTEM_PROMPT, ReActAgent from lightrag.utils.registry import EntityMapping __all__ = [ - "ReactAgent", + "ReActAgent", "DEFAULT_REACT_AGENT_SYSTEM_PROMPT", ] diff --git a/lightrag/components/agent/react.py b/lightrag/components/agent/react.py index 55d5cb627..7b45c865a 100644 --- a/lightrag/components/agent/react.py +++ b/lightrag/components/agent/react.py @@ -164,7 +164,7 @@ def __init__( func=self._finish, answer="final answer: 'answer'", ) - output_parser = JsonOutputParser(data_class=ouput_data_class, example=example) + output_parser = JsonOutputParser(data_class=ouput_data_class, examples=example) prompt_kwargs = { "tools": self.tool_manager.yaml_definitions, "output_format_str": output_parser.format_instructions(), @@ -320,7 +320,7 @@ def _extra_repr(self) -> str: if __name__ == "__main__": - from components.model_client import GroqAPIClient + from lightrag.components.model_client import GroqAPIClient from lightrag.core.types import ModelClientType from lightrag.utils import setup_env # noqa