From 39f7f73db2530c8c81755e3d465d8291ff010169 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Fri, 5 Apr 2024 09:53:46 -0700 Subject: [PATCH] Support non-json serializable objects in LLMContext (#1589) * Contains breaking changes for the C++ API * No breaking changes for the Python API * Updates `nlohmann_json from` 3.9 to 3.11 for `patch_inplace` method. * Requires nv-morpheus/MRC#455 to be merged first Closes #1560 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1589 --- .../all_cuda-121_arch-x86_64.yaml | 2 +- .../dev_cuda-121_arch-x86_64.yaml | 2 +- dependencies.yaml | 2 +- morpheus.code-workspace | 28 +++++ .../_lib/include/morpheus/llm/llm_context.hpp | 39 ++++--- .../_lib/include/morpheus/llm/llm_engine.hpp | 5 +- .../include/morpheus/llm/llm_lambda_node.hpp | 4 +- .../_lib/include/morpheus/pybind11/json.hpp | 35 +++--- morpheus/_lib/llm/__init__.pyi | 4 +- morpheus/_lib/llm/include/py_llm_engine.hpp | 2 +- .../_lib/llm/include/py_llm_lambda_node.hpp | 2 +- morpheus/_lib/llm/module.cpp | 28 ++--- morpheus/_lib/llm/src/py_llm_engine.cpp | 2 +- morpheus/_lib/llm/src/py_llm_lambda_node.cpp | 13 +-- morpheus/_lib/llm/src/py_llm_task_handler.cpp | 3 +- morpheus/_lib/src/llm/llm_context.cpp | 96 ++++++++------- morpheus/_lib/src/llm/llm_engine.cpp | 5 +- morpheus/_lib/tests/llm/test_llm_context.cpp | 110 +++++++++--------- morpheus/_lib/tests/llm/test_llm_engine.cpp | 7 +- morpheus/_lib/tests/llm/test_llm_node.cpp | 11 +- .../_lib/tests/llm/test_llm_node_runner.cpp | 11 +- .../llm/test_llm_task_handler_runner.cpp | 12 +- morpheus/llm/nodes/langchain_agent_node.py | 7 +- tests/llm/nodes/test_langchain_agent_node.py | 2 +- .../task_handlers/test_simple_task_handler.py | 5 +- 25 files changed, 238 insertions(+), 199 deletions(-) diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml index 6918fb0d05..fdc3194a9c 100644 --- a/conda/environments/all_cuda-121_arch-x86_64.yaml +++ b/conda/environments/all_cuda-121_arch-x86_64.yaml @@ -63,7 +63,7 @@ dependencies: - networkx=2.8.8 - newspaper3k=0.2 - ninja=1.11 -- nlohmann_json=3.9 +- nlohmann_json=3.11 - nodejs=18.* - numexpr - numpydoc=1.5 diff --git a/conda/environments/dev_cuda-121_arch-x86_64.yaml b/conda/environments/dev_cuda-121_arch-x86_64.yaml index bf045f359a..cd479f4108 100644 --- a/conda/environments/dev_cuda-121_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-121_arch-x86_64.yaml @@ -49,7 +49,7 @@ dependencies: - nbsphinx - networkx=2.8.8 - ninja=1.11 -- nlohmann_json=3.9 +- nlohmann_json=3.11 - nodejs=18.* - numpydoc=1.5 - nvtabular=23.08.00 diff --git a/dependencies.yaml b/dependencies.yaml index d19cb844bd..6ba18860d0 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -181,7 +181,7 @@ dependencies: - librdkafka>=1.9.2,<1.10.0a0 - mrc=24.03 - ninja=1.11 - - nlohmann_json=3.9 + - nlohmann_json=3.11 - pkg-config=0.29 # for mrc cmake - protobuf=4.24 - pybind11-stubgen=0.10.5 diff --git a/morpheus.code-workspace b/morpheus.code-workspace index 4e0da64a64..49801e0482 100644 --- a/morpheus.code-workspace +++ b/morpheus.code-workspace @@ -585,6 +585,34 @@ }, "type": "cppdbg" }, + { + "MIMode": "gdb", + "args": [ + // Uncomment to run a specific test + // "--gtest_filter='TestLLMContext.PopSelectMultipleOutputs'" + ], + "cwd": "${workspaceFolder}", + "environment": [ + { + "name": "GLOG_v", + "value": "10" + } + ], + "externalConsole": false, + "miDebuggerPath": "gdb", + "name": "Debug LLM C++ Tests", + "program": "${workspaceFolder}/build/morpheus/_lib/tests/test_llm.x", + "request": "launch", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "ignoreFailures": true, + "text": "-enable-pretty-printing" + } + ], + "stopAtEntry": false, + "type": "cppdbg" + }, { "args": [ "--input_file=validation.csv", diff --git a/morpheus/_lib/include/morpheus/llm/llm_context.hpp b/morpheus/_lib/include/morpheus/llm/llm_context.hpp index 6f6deb2063..908a39a84b 100644 --- a/morpheus/_lib/include/morpheus/llm/llm_context.hpp +++ b/morpheus/_lib/include/morpheus/llm/llm_context.hpp @@ -18,13 +18,12 @@ #pragma once #include "morpheus/export.h" +#include "morpheus/llm/fwd.hpp" // for ControlMessage #include "morpheus/llm/input_map.hpp" #include "morpheus/llm/llm_task.hpp" -#include "morpheus/messages/control.hpp" -#include "morpheus/utilities/json_types.hpp" #include -#include +#include #include #include @@ -36,7 +35,6 @@ struct LLMContextState { LLMTask task; std::shared_ptr message; - nlohmann::json values; }; /** @@ -110,11 +108,11 @@ class MORPHEUS_EXPORT LLMContext : public std::enable_shared_from_this& message() const; /** - * @brief Get all output mappings for this context. + * @brief Get all outputs for this context. * - * @return nlohmann::json::const_reference + * @return const mrc::pymrc::JSONValues& */ - nlohmann::json::const_reference all_outputs() const; + const mrc::pymrc::JSONValues& all_outputs() const; /** * @brief Get full name of context containing parents up to root. @@ -142,31 +140,31 @@ class MORPHEUS_EXPORT LLMContext : public std::enable_shared_from_this m_parent{nullptr}; std::string m_name; input_mappings_t m_inputs; @@ -195,7 +200,7 @@ class MORPHEUS_EXPORT LLMContext : public std::enable_shared_from_this m_state; - nlohmann::json m_outputs; + mrc::pymrc::JSONValues m_outputs; mrc::Promise m_outputs_promise; mrc::SharedFuture m_outputs_future; diff --git a/morpheus/_lib/include/morpheus/llm/llm_engine.hpp b/morpheus/_lib/include/morpheus/llm/llm_engine.hpp index 0527920ee1..31651c5c94 100644 --- a/morpheus/_lib/include/morpheus/llm/llm_engine.hpp +++ b/morpheus/_lib/include/morpheus/llm/llm_engine.hpp @@ -18,12 +18,9 @@ #pragma once #include "morpheus/export.h" -#include "morpheus/llm/fwd.hpp" +#include "morpheus/llm/fwd.hpp" // for ControlMessage, LLMContext, LLMTask, LLMTaskHandler, LLMTaskHandlerRunner #include "morpheus/llm/input_map.hpp" -#include "morpheus/llm/llm_context.hpp" #include "morpheus/llm/llm_node.hpp" -#include "morpheus/llm/llm_task_handler.hpp" -#include "morpheus/messages/control.hpp" #include "morpheus/types.hpp" #include diff --git a/morpheus/_lib/include/morpheus/llm/llm_lambda_node.hpp b/morpheus/_lib/include/morpheus/llm/llm_lambda_node.hpp index e1d4a8d8fe..34da27f68a 100644 --- a/morpheus/_lib/include/morpheus/llm/llm_lambda_node.hpp +++ b/morpheus/_lib/include/morpheus/llm/llm_lambda_node.hpp @@ -78,7 +78,7 @@ class LLMLambdaNode : public LLMNodeBase { const auto& arg = context->get_input(); - auto output = co_await this->m_function(arg.get>()); + auto output = co_await this->m_function(arg.view_json().get>()); nlohmann::json outputs_json = std::move(output); @@ -91,7 +91,7 @@ class LLMLambdaNode : public LLMNodeBase { auto args = context->get_inputs(); - auto outputs = co_await this->m_function(args); + auto outputs = co_await this->m_function(args.view_json()); nlohmann::json outputs_json = std::move(outputs); diff --git a/morpheus/_lib/include/morpheus/pybind11/json.hpp b/morpheus/_lib/include/morpheus/pybind11/json.hpp index ae87c0c8cd..69d3f2ac3e 100644 --- a/morpheus/_lib/include/morpheus/pybind11/json.hpp +++ b/morpheus/_lib/include/morpheus/pybind11/json.hpp @@ -35,31 +35,36 @@ struct type_caster { public: /** - * This macro establishes the name 'inty' in - * function signatures and declares a local variable - * 'value' of type inty + * This macro establishes a local variable 'value' of type nlohmann::json */ PYBIND11_TYPE_CASTER(nlohmann::json, _("object")); /** - * Conversion part 1 (Python->C++): convert a PyObject into a inty + * Conversion part 1 (Python->C++): convert a PyObject into an nlohmann::json * instance or return false upon failure. The second argument * indicates whether implicit conversions should be applied. */ bool load(handle src, bool convert) { - if (!src || src.is_none()) + if (!src) { return false; } - value = mrc::pymrc::cast_from_pyobject(pybind11::reinterpret_borrow(src)); + if (src.is_none()) + { + value = nlohmann::json(nullptr); + } + else + { + value = mrc::pymrc::cast_from_pyobject(pybind11::reinterpret_borrow(src)); + } return true; } /** - * Conversion part 2 (C++ -> Python): convert an inty instance into + * Conversion part 2 (C++ -> Python): convert an nlohmann::json instance into * a Python object. The second and third arguments are used to * indicate the return value policy and parent object (for * ``return_value_policy::reference_internal``) and are generally @@ -76,14 +81,12 @@ struct type_caster { public: /** - * This macro establishes the name 'inty' in - * function signatures and declares a local variable - * 'value' of type inty + * This macro establishes a local variable 'value' of type nlohmann::json_dict */ PYBIND11_TYPE_CASTER(nlohmann::json_dict, _("dict[str, typing.Any]")); /** - * Conversion part 1 (Python->C++): convert a PyObject into a inty + * Conversion part 1 (Python->C++): convert a PyObject into an nlohmann::json_dict * instance or return false upon failure. The second argument * indicates whether implicit conversions should be applied. */ @@ -106,7 +109,7 @@ struct type_caster } /** - * Conversion part 2 (C++ -> Python): convert an inty instance into + * Conversion part 2 (C++ -> Python): convert an nlohmann::json_dict instance into * a Python object. The second and third arguments are used to * indicate the return value policy and parent object (for * ``return_value_policy::reference_internal``) and are generally @@ -123,14 +126,12 @@ struct type_caster { public: /** - * This macro establishes the name 'inty' in - * function signatures and declares a local variable - * 'value' of type inty + * This macro establishes a local variable 'value' of type nlohmann::json_list */ PYBIND11_TYPE_CASTER(nlohmann::json_list, _("list[typing.Any]")); /** - * Conversion part 1 (Python->C++): convert a PyObject into a inty + * Conversion part 1 (Python->C++): convert a PyObject into an nlohmann::json_list * instance or return false upon failure. The second argument * indicates whether implicit conversions should be applied. */ @@ -153,7 +154,7 @@ struct type_caster } /** - * Conversion part 2 (C++ -> Python): convert an inty instance into + * Conversion part 2 (C++ -> Python): convert an nlohmann::json_list instance into * a Python object. The second and third arguments are used to * indicate the return value policy and parent object (for * ``return_value_policy::reference_internal``) and are generally diff --git a/morpheus/_lib/llm/__init__.pyi b/morpheus/_lib/llm/__init__.pyi index 110b133e4e..5f39468755 100644 --- a/morpheus/_lib/llm/__init__.pyi +++ b/morpheus/_lib/llm/__init__.pyi @@ -67,7 +67,7 @@ class LLMContext(): def get_input(self) -> object: ... @typing.overload def get_input(self, node_name: str) -> object: ... - def get_inputs(self) -> dict: ... + def get_inputs(self) -> object: ... def message(self) -> morpheus._lib.messages.ControlMessage: ... def push(self, name: str, inputs: typing.List[InputMap]) -> LLMContext: ... @typing.overload @@ -210,7 +210,7 @@ class LLMTaskHandler(): def __init__(self) -> None: ... def get_input_names(self) -> typing.List[str]: """ - Get the input names for the task handler. + Get the input names for the task handler. Returns ------- diff --git a/morpheus/_lib/llm/include/py_llm_engine.hpp b/morpheus/_lib/llm/include/py_llm_engine.hpp index dcf3f18a67..39772cb72f 100644 --- a/morpheus/_lib/llm/include/py_llm_engine.hpp +++ b/morpheus/_lib/llm/include/py_llm_engine.hpp @@ -19,7 +19,7 @@ #include "py_llm_node.hpp" -#include "morpheus/llm/fwd.hpp" +#include "morpheus/llm/fwd.hpp" // for LLMTaskHandler, ControlMessage, LLMContext, LLMEngine, LLMTask #include "morpheus/llm/input_map.hpp" #include "morpheus/llm/llm_engine.hpp" diff --git a/morpheus/_lib/llm/include/py_llm_lambda_node.hpp b/morpheus/_lib/llm/include/py_llm_lambda_node.hpp index a0b9543064..f4c38d70ac 100644 --- a/morpheus/_lib/llm/include/py_llm_lambda_node.hpp +++ b/morpheus/_lib/llm/include/py_llm_lambda_node.hpp @@ -17,7 +17,7 @@ #pragma once -#include "morpheus/llm/llm_context.hpp" +#include "morpheus/llm/fwd.hpp" // for LLMContext #include "morpheus/llm/llm_node_base.hpp" #include "morpheus/types.hpp" diff --git a/morpheus/_lib/llm/module.cpp b/morpheus/_lib/llm/module.cpp index decac4cd0b..a356d6b504 100644 --- a/morpheus/_lib/llm/module.cpp +++ b/morpheus/_lib/llm/module.cpp @@ -32,30 +32,28 @@ #include "morpheus/llm/llm_task_handler.hpp" #include "morpheus/messages/control.hpp" // IWYU pragma: keep #include "morpheus/pybind11/input_map.hpp" // IWYU pragma: keep -#include "morpheus/pybind11/json.hpp" // IWYU pragma: keep #include "morpheus/utilities/cudf_util.hpp" -#include "morpheus/utilities/json_types.hpp" #include "morpheus/version.hpp" +#include // for Object, ObjectProperties #include #include -#include -#include -#include #include // IWYU pragma: keep #include // for arg, init, class_, module_, str_attr_accessor, PYBIND11_MODULE, pybind11 -#include // for dict, sequence #include // IWYU pragma: keep #include // IWYU pragma: keep -#include // for pymrc::import +#include // for JSONValues +#include // for pymrc::import #include #include #include #include #include -#include #include +// IWYU pragma: no_include "morpheus/llm/fwd.hpp" +// IWYU pragma: no_include +// IWYU pragma: no_include namespace morpheus::llm { namespace py = pybind11; @@ -199,14 +197,10 @@ PYBIND11_MODULE(llm, _module) .def("get_input", py::overload_cast(&LLMContext::get_input, py::const_), py::arg("node_name")) - .def("get_inputs", - [](LLMContext& self) { - // Convert the return value - return mrc::pymrc::cast_from_json(self.get_inputs()).cast(); - }) - .def("set_output", py::overload_cast(&LLMContext::set_output), py::arg("outputs")) + .def("get_inputs", &LLMContext::get_inputs) + .def("set_output", py::overload_cast(&LLMContext::set_output), py::arg("outputs")) .def("set_output", - py::overload_cast(&LLMContext::set_output), + py::overload_cast(&LLMContext::set_output), py::arg("output_name"), py::arg("output")) .def("push", &LLMContext::push, py::arg("name"), py::arg("inputs")); @@ -301,8 +295,8 @@ PYBIND11_MODULE(llm, _module) .def("get_input_names", &LLMTaskHandler::get_input_names, R"pbdoc( - Get the input names for the task handler. - + Get the input names for the task handler. + Returns ------- list[str] diff --git a/morpheus/_lib/llm/src/py_llm_engine.cpp b/morpheus/_lib/llm/src/py_llm_engine.cpp index 5e6267456d..5f392a38df 100644 --- a/morpheus/_lib/llm/src/py_llm_engine.cpp +++ b/morpheus/_lib/llm/src/py_llm_engine.cpp @@ -19,7 +19,7 @@ #include "py_llm_task_handler.hpp" -#include "morpheus/llm/llm_task_handler.hpp" +#include "morpheus/llm/llm_task_handler.hpp" // for LLMTaskHandler #include diff --git a/morpheus/_lib/llm/src/py_llm_lambda_node.cpp b/morpheus/_lib/llm/src/py_llm_lambda_node.cpp index 5a3b89e1e7..13b5cf4606 100644 --- a/morpheus/_lib/llm/src/py_llm_lambda_node.cpp +++ b/morpheus/_lib/llm/src/py_llm_lambda_node.cpp @@ -19,19 +19,18 @@ #include "pymrc/coro.hpp" -#include "morpheus/llm/llm_context.hpp" +#include "morpheus/llm/llm_context.hpp" // for LLMContext #include "morpheus/llm/llm_node_base.hpp" -#include "morpheus/utilities/json_types.hpp" #include "morpheus/utilities/string_util.hpp" +#include // for DCHECK_EQ #include // IWYU pragma: keep -#include -#include +#include // for PyGILState_Check, gil_scoped_acquire, gil_scoped_release #include #include #include // IWYU pragma: keep #include -#include +#include #include #include @@ -98,7 +97,7 @@ Task> PyLLMLambdaNode::execute(std::shared_ptr> PyLLMLambdaNode::execute(std::shared_ptrset_output(std::move(return_val)); diff --git a/morpheus/_lib/llm/src/py_llm_task_handler.cpp b/morpheus/_lib/llm/src/py_llm_task_handler.cpp index 1fe857c4ae..79884995bd 100644 --- a/morpheus/_lib/llm/src/py_llm_task_handler.cpp +++ b/morpheus/_lib/llm/src/py_llm_task_handler.cpp @@ -17,7 +17,8 @@ #include "py_llm_task_handler.hpp" -#include "morpheus/llm/llm_context.hpp" +#include "morpheus/llm/llm_context.hpp" // IWYU pragma: keep +#include "morpheus/messages/control.hpp" // IWYU pragma: keep #include // IWYU pragma: keep #include diff --git a/morpheus/_lib/src/llm/llm_context.cpp b/morpheus/_lib/src/llm/llm_context.cpp index e4989936bb..5efb6a710e 100644 --- a/morpheus/_lib/src/llm/llm_context.cpp +++ b/morpheus/_lib/src/llm/llm_context.cpp @@ -17,9 +17,10 @@ #include "morpheus/llm/llm_context.hpp" -#include "morpheus/utilities/string_util.hpp" +#include "morpheus/messages/control.hpp" // IWYU pragma: keep #include +#include #include #include #include @@ -77,7 +78,7 @@ std::shared_ptr& LLMContext::message() const return m_state->message; } -nlohmann::json::const_reference LLMContext::all_outputs() const +const mrc::pymrc::JSONValues& LLMContext::all_outputs() const { return m_outputs; } @@ -115,18 +116,18 @@ void LLMContext::pop() else { // Build a new json object with only the specified keys - nlohmann::json new_outputs; + mrc::pymrc::JSONValues new_outputs; for (const auto& output_name : m_output_names) { - new_outputs[output_name] = m_outputs[output_name]; + new_outputs = new_outputs.set_value(output_name, std::move(m_outputs[output_name])); } m_parent->set_output(m_name, std::move(new_outputs)); } } -nlohmann::json::const_reference LLMContext::get_input() const +mrc::pymrc::JSONValues LLMContext::get_input() const { if (m_inputs.size() > 1) { @@ -137,77 +138,72 @@ nlohmann::json::const_reference LLMContext::get_input() const return this->get_input(m_inputs[0].internal_name); } -nlohmann::json::const_reference LLMContext::get_input(const std::string& node_name) const +input_mappings_t::const_iterator LLMContext::find_input(const std::string& node_name, bool throw_if_not_found) const { - if (node_name[0] == '/') - { - nlohmann::json::json_pointer node_json_ptr(node_name); - - if (!m_outputs.contains(node_json_ptr)) - { - throw std::runtime_error(MORPHEUS_CONCAT_STR("Input '" << node_name << "' not found in the output map")); - } + auto found = std::find_if(m_inputs.cbegin(), m_inputs.cend(), [&node_name](const auto& map_iterator) { + return map_iterator.internal_name == node_name; + }); - // Get the value from a sibling output - return m_outputs[node_json_ptr]; - } - else + if (throw_if_not_found && found == m_inputs.cend()) { - // Must be on the parent, so find the mapping between this namespace and the parent - auto found = std::find_if(m_inputs.begin(), m_inputs.end(), [&node_name](const auto& map_iterator) { - return map_iterator.internal_name == node_name; - }); + std::stringstream error_msg; + error_msg << "Input '" << node_name << "' not found in the input list."; - if (found == m_inputs.end()) + if (!m_inputs.empty()) { - std::stringstream error_msg; - error_msg << "Input '" << node_name << "' not found in the input list."; - - if (!m_inputs.empty()) - { - error_msg << " Available inputs are:"; - for (const auto& input : m_inputs) - { - error_msg << " '" << input.internal_name << "'"; - } - } - else + error_msg << " Available inputs are:"; + for (const auto& input : m_inputs) { - error_msg << " Input list is empty."; + error_msg << " '" << input.internal_name << "'"; } - - throw std::runtime_error(error_msg.str()); + } + else + { + error_msg << " Input list is empty."; } - auto& input_name = found->external_name; - - // Get the value from a parent output - return m_parent->get_input(input_name); + throw std::runtime_error(error_msg.str()); } + + return found; } -nlohmann::json LLMContext::get_inputs() const +mrc::pymrc::JSONValues LLMContext::get_input(const std::string& node_name) const { - nlohmann::json inputs = nlohmann::json::object(); + if (node_name[0] == '/') + { + return m_outputs[node_name]; + } + // Must be on the parent, so find the mapping between this namespace and the parent + auto found = find_input(node_name); + auto& input_name = found->external_name; + + // Get the value from a parent output + return m_parent->get_input(input_name); +} + +mrc::pymrc::JSONValues LLMContext::get_inputs() const +{ + mrc::pymrc::JSONValues inputs; for (const auto& in_map : m_inputs) { - inputs[in_map.internal_name] = this->get_input(in_map.internal_name); + auto input_value = this->get_input(in_map.internal_name); + inputs = inputs.set_value(in_map.internal_name, std::move(input_value)); } return inputs; } -void LLMContext::set_output(nlohmann::json outputs) +void LLMContext::set_output(mrc::pymrc::JSONValues&& outputs) { m_outputs = std::move(outputs); - this->outputs_complete(); } -void LLMContext::set_output(const std::string& output_name, nlohmann::json output) +void LLMContext::set_output(const std::string& output_name, mrc::pymrc::JSONValues&& output) { - m_outputs[output_name] = std::move(output); + m_outputs = std::move(m_outputs.set_value(output_name, std::move(output))); } void LLMContext::set_output_names(std::vector output_names) @@ -220,7 +216,7 @@ void LLMContext::outputs_complete() // m_outputs_promise.set_value(); } -nlohmann::json::const_reference LLMContext::view_outputs() const +const mrc::pymrc::JSONValues& LLMContext::view_outputs() const { // // Wait for the outputs to be available // m_outputs_future.wait(); diff --git a/morpheus/_lib/src/llm/llm_engine.cpp b/morpheus/_lib/src/llm/llm_engine.cpp index 14c66a8016..3c29c84676 100644 --- a/morpheus/_lib/src/llm/llm_engine.cpp +++ b/morpheus/_lib/src/llm/llm_engine.cpp @@ -17,9 +17,12 @@ #include "morpheus/llm/llm_engine.hpp" +#include "morpheus/llm/llm_context.hpp" // for LLMContext #include "morpheus/llm/llm_task.hpp" -#include "morpheus/llm/llm_task_handler_runner.hpp" +#include "morpheus/llm/llm_task_handler.hpp" // for LLMTaskHandler +#include "morpheus/llm/llm_task_handler_runner.hpp" // for LLMTaskHandlerRunner #include "morpheus/llm/utils.hpp" +#include "morpheus/messages/control.hpp" // for ControlMessage #include // IWYU pragma: keep #include diff --git a/morpheus/_lib/tests/llm/test_llm_context.cpp b/morpheus/_lib/tests/llm/test_llm_context.cpp index 38438f2e39..0da4317420 100644 --- a/morpheus/_lib/tests/llm/test_llm_context.cpp +++ b/morpheus/_lib/tests/llm/test_llm_context.cpp @@ -18,22 +18,18 @@ #include "../test_utils/common.hpp" // IWYU pragma: associated #include "morpheus/llm/input_map.hpp" -#include "morpheus/llm/llm_context.hpp" -#include "morpheus/llm/llm_lambda_node.hpp" -#include "morpheus/llm/llm_node.hpp" -#include "morpheus/llm/llm_node_runner.hpp" +#include "morpheus/llm/llm_context.hpp" // for LLMContext #include "morpheus/llm/llm_task.hpp" -#include "morpheus/types.hpp" +#include "morpheus/messages/control.hpp" // for ControlMessage #include #include -#include #include +#include // for JSONValues -#include #include #include -#include +// IWYU pragma: no_include "morpheus/llm/fwd.hpp" using namespace morpheus; using namespace morpheus::test; @@ -161,9 +157,10 @@ TEST_F(TestLLMContext, SetOutput) outputs = {{"key1", "val1"}, {"key2", "val2"}}; ctx.set_output(outputs); - ASSERT_EQ(ctx.all_outputs().size(), 2); - ASSERT_EQ(ctx.all_outputs()["key1"], "val1"); - ASSERT_EQ(ctx.all_outputs()["key2"], "val2"); + const auto& json_outputs = ctx.all_outputs().view_json(); + ASSERT_EQ(json_outputs.size(), 2); + ASSERT_EQ(json_outputs["key1"], "val1"); + ASSERT_EQ(json_outputs["key2"], "val2"); } TEST_F(TestLLMContext, SetOutputDict) @@ -173,8 +170,10 @@ TEST_F(TestLLMContext, SetOutputDict) outputs = {{"key1", "val1"}, {"key2", "val2"}}; ctx.set_output("output", outputs); - ASSERT_EQ(ctx.all_outputs()["output"]["key1"], "val1"); - ASSERT_EQ(ctx.all_outputs()["output"]["key2"], "val2"); + + const auto& json_outputs = ctx.all_outputs().view_json(); + ASSERT_EQ(json_outputs["output"]["key1"], "val1"); + ASSERT_EQ(json_outputs["output"]["key2"], "val2"); } TEST_F(TestLLMContext, PushPop) @@ -190,13 +189,15 @@ TEST_F(TestLLMContext, PushPop) nlohmann::json outputs; outputs = {{"key1", "val1"}, {"key2", "val2"}}; child_ctx->set_output(outputs); - ASSERT_EQ(child_ctx->all_outputs()["key1"], "val1"); - ASSERT_EQ(child_ctx->all_outputs()["key2"], "val2"); + + const auto& child_json_outputs = child_ctx->all_outputs().view_json(); + ASSERT_EQ(child_json_outputs["key1"], "val1"); + ASSERT_EQ(child_json_outputs["key2"], "val2"); child_ctx->pop(); - ASSERT_EQ(child_ctx->all_outputs(), nullptr); - ASSERT_EQ(child_ctx->parent()->all_outputs()["child"]["key1"], "val1"); - ASSERT_EQ(child_ctx->parent()->all_outputs()["child"]["key2"], "val2"); + const auto& parent_json_outputs = child_ctx->parent()->all_outputs().view_json(); + ASSERT_EQ(parent_json_outputs["child"]["key1"], "val1"); + ASSERT_EQ(parent_json_outputs["child"]["key2"], "val2"); } TEST_F(TestLLMContext, PopWithoutPush) @@ -212,13 +213,16 @@ TEST_F(TestLLMContext, PopWithoutPush) nlohmann::json outputs; outputs = {{"key1", "val1"}, {"key2", "val2"}}; child_ctx.set_output(outputs); - ASSERT_EQ(child_ctx.all_outputs()["key1"], "val1"); - ASSERT_EQ(child_ctx.all_outputs()["key2"], "val2"); + + const auto& child_json_outputs = child_ctx.all_outputs().view_json(); + ASSERT_EQ(child_json_outputs["key1"], "val1"); + ASSERT_EQ(child_json_outputs["key2"], "val2"); child_ctx.pop(); - ASSERT_EQ(child_ctx.all_outputs(), nullptr); - ASSERT_EQ(parent_ctx->all_outputs()["child"]["key1"], "val1"); - ASSERT_EQ(parent_ctx->all_outputs()["child"]["key2"], "val2"); + + const auto& parent_json_outputs = parent_ctx->all_outputs().view_json(); + ASSERT_EQ(parent_json_outputs["child"]["key1"], "val1"); + ASSERT_EQ(parent_json_outputs["child"]["key2"], "val2"); } TEST_F(TestLLMContext, PopSelectOneOutput) @@ -230,19 +234,18 @@ TEST_F(TestLLMContext, PopSelectOneOutput) nlohmann::json outputs; outputs = {{"key1", "val1"}, {"key2", "val2"}, {"key3", "val3"}}; child_ctx->set_output(outputs); - ASSERT_EQ(child_ctx->all_outputs()["key1"], "val1"); - ASSERT_EQ(child_ctx->all_outputs()["key2"], "val2"); - ASSERT_EQ(child_ctx->all_outputs()["key3"], "val3"); + + const auto& child_json_outputs = child_ctx->all_outputs().view_json(); + ASSERT_EQ(child_json_outputs["key1"], "val1"); + ASSERT_EQ(child_json_outputs["key2"], "val2"); + ASSERT_EQ(child_json_outputs["key3"], "val3"); child_ctx->set_output_names({"key2"}); child_ctx->pop(); - // std::cerr << child_ctx->all_outputs(); - ASSERT_EQ(child_ctx->all_outputs().size(), 3); - ASSERT_EQ(child_ctx->all_outputs()["key1"], "val1"); - ASSERT_EQ(child_ctx->all_outputs()["key2"], nullptr); - ASSERT_EQ(child_ctx->all_outputs()["key3"], "val3"); - ASSERT_EQ(child_ctx->parent()->all_outputs()["child"].size(), 1); - ASSERT_EQ(child_ctx->parent()->all_outputs()["child"], "val2"); + + const auto& parent_json_outputs = parent_ctx->all_outputs().view_json(); + ASSERT_EQ(parent_json_outputs["child"].size(), 1); + ASSERT_EQ(parent_json_outputs["child"], "val2"); } TEST_F(TestLLMContext, PopSelectMultipleOutputs) @@ -254,19 +257,19 @@ TEST_F(TestLLMContext, PopSelectMultipleOutputs) nlohmann::json outputs; outputs = {{"key1", "val1"}, {"key2", "val2"}, {"key3", "val3"}}; child_ctx->set_output(outputs); - ASSERT_EQ(child_ctx->all_outputs()["key1"], "val1"); - ASSERT_EQ(child_ctx->all_outputs()["key2"], "val2"); - ASSERT_EQ(child_ctx->all_outputs()["key3"], "val3"); + + const auto& child_json_outputs = child_ctx->all_outputs().view_json(); + ASSERT_EQ(child_json_outputs["key1"], "val1"); + ASSERT_EQ(child_json_outputs["key2"], "val2"); + ASSERT_EQ(child_json_outputs["key3"], "val3"); child_ctx->set_output_names({"key2", "key3"}); child_ctx->pop(); - ASSERT_EQ(child_ctx->all_outputs().size(), 3); - ASSERT_EQ(child_ctx->all_outputs()["key1"], "val1"); - ASSERT_EQ(child_ctx->all_outputs()["key2"], "val2"); - ASSERT_EQ(child_ctx->all_outputs()["key3"], "val3"); - ASSERT_EQ(child_ctx->parent()->all_outputs()["child"].size(), 2); - ASSERT_EQ(child_ctx->parent()->all_outputs()["child"]["key2"], "val2"); - ASSERT_EQ(child_ctx->parent()->all_outputs()["child"]["key3"], "val3"); + + const auto& parent_json_outputs = child_ctx->parent()->all_outputs().view_json(); + ASSERT_EQ(parent_json_outputs["child"].size(), 2); + ASSERT_EQ(parent_json_outputs["child"]["key2"], "val2"); + ASSERT_EQ(parent_json_outputs["child"]["key3"], "val3"); } TEST_F(TestLLMContext, SingleInputMappingValid) @@ -278,9 +281,9 @@ TEST_F(TestLLMContext, SingleInputMappingValid) auto inputs = llm::input_mappings_t{{"/parent_out", "input1"}}; llm::LLMContext child_ctx{parent_ctx, "child", inputs}; - ASSERT_EQ(child_ctx.get_input(), "val1"); - ASSERT_EQ(child_ctx.get_input("input1"), "val1"); - ASSERT_EQ(child_ctx.get_inputs()["input1"], "val1"); + ASSERT_EQ(child_ctx.get_input().view_json(), "val1"); + ASSERT_EQ(child_ctx.get_input("input1").view_json(), "val1"); + ASSERT_EQ(child_ctx.get_inputs().view_json()["input1"], "val1"); ASSERT_THROW(child_ctx.get_input("input2"), std::runtime_error); } @@ -307,10 +310,13 @@ TEST_F(TestLLMContext, MultipleInputMappingsValid) auto inputs = llm::input_mappings_t{{"/parent_out1", "input1"}, {"/parent_out2", "input2"}}; llm::LLMContext child_ctx{parent_ctx, "child", inputs}; - ASSERT_EQ(child_ctx.get_input("input1"), "val1"); - ASSERT_EQ(child_ctx.get_input("input2"), "val2"); - ASSERT_EQ(child_ctx.get_inputs()["input1"], "val1"); - ASSERT_EQ(child_ctx.get_inputs()["input2"], "val2"); + ASSERT_EQ(child_ctx.get_input("input1").view_json(), "val1"); + ASSERT_EQ(child_ctx.get_input("input2").view_json(), "val2"); + + auto child_inputs = child_ctx.get_inputs().view_json(); + + ASSERT_EQ(child_inputs["input1"], "val1"); + ASSERT_EQ(child_inputs["input2"], "val2"); ASSERT_THROW(child_ctx.get_input(), std::runtime_error); ASSERT_THROW(child_ctx.get_input("input3"), std::runtime_error); } @@ -324,7 +330,7 @@ TEST_F(TestLLMContext, MultipleInputMappingsSingleInvalid) auto inputs = llm::input_mappings_t{{"/parent_out1", "input1"}, {"/invalid", "input2"}}; llm::LLMContext child_ctx{parent_ctx, "child", inputs}; - ASSERT_EQ(child_ctx.get_input("input1"), "val1"); + ASSERT_EQ(child_ctx.get_input("input1").view_json(), "val1"); ASSERT_THROW(child_ctx.get_input("input2"), std::runtime_error); ASSERT_THROW(child_ctx.get_inputs(), std::runtime_error); } @@ -341,4 +347,4 @@ TEST_F(TestLLMContext, MultipleInputMappingsBothInvalid) ASSERT_THROW(child_ctx.get_input("input1"), std::runtime_error); ASSERT_THROW(child_ctx.get_input("input2"), std::runtime_error); ASSERT_THROW(child_ctx.get_inputs(), std::runtime_error); -} \ No newline at end of file +} diff --git a/morpheus/_lib/tests/llm/test_llm_engine.cpp b/morpheus/_lib/tests/llm/test_llm_engine.cpp index 2f9ec60681..52864cc111 100644 --- a/morpheus/_lib/tests/llm/test_llm_engine.cpp +++ b/morpheus/_lib/tests/llm/test_llm_engine.cpp @@ -28,11 +28,13 @@ #include #include #include +#include // for JSONValues #include #include #include #include +// IWYU pragma: no_include "morpheus/llm/fwd.hpp" using namespace morpheus; using namespace morpheus::test; @@ -87,6 +89,7 @@ TEST_F(TestLLMEngine, AsyncTest) EXPECT_TRUE(return_val.is_ready()); EXPECT_EQ(counter, 2); - EXPECT_EQ(out_context->view_outputs()["start"], 123); - EXPECT_EQ(out_context->view_outputs()["test"], 124); + const auto& json_outputs = out_context->view_outputs().view_json(); + EXPECT_EQ(json_outputs["start"], 123); + EXPECT_EQ(json_outputs["test"], 124); } diff --git a/morpheus/_lib/tests/llm/test_llm_node.cpp b/morpheus/_lib/tests/llm/test_llm_node.cpp index d3da618368..5236d3bb5e 100644 --- a/morpheus/_lib/tests/llm/test_llm_node.cpp +++ b/morpheus/_lib/tests/llm/test_llm_node.cpp @@ -17,10 +17,9 @@ #include "../test_utils/common.hpp" // IWYU pragma: associated -#include "morpheus/llm/input_map.hpp" -#include "morpheus/llm/llm_context.hpp" +#include "morpheus/llm/llm_context.hpp" // for LLMContext #include "morpheus/llm/llm_lambda_node.hpp" -#include "morpheus/llm/llm_node.hpp" +#include "morpheus/llm/llm_node.hpp" // for LLMNode #include "morpheus/llm/llm_node_runner.hpp" #include "morpheus/llm/llm_task.hpp" #include "morpheus/types.hpp" @@ -28,11 +27,13 @@ #include #include #include +#include // for JSONValues #include #include #include #include +// IWYU pragma: no_include "morpheus/llm/fwd.hpp" using namespace morpheus; using namespace morpheus::test; @@ -63,7 +64,7 @@ TEST_F(TestLLMNode, NoNodes) auto out_context = coroutines::sync_wait(node.execute(context)); ASSERT_EQ(out_context, context); - ASSERT_EQ(out_context->view_outputs().size(), 0); + ASSERT_EQ(out_context->view_outputs().view_json().size(), 0); } TEST_F(TestLLMNode, AddNode) @@ -162,5 +163,5 @@ TEST_F(TestLLMNode, AddChildNode) auto context = std::make_shared(llm::LLMTask{}, nullptr); auto out_context = coroutines::sync_wait(node.execute(context)); - ASSERT_EQ(out_context->view_outputs()["Root3"], 3); + ASSERT_EQ(out_context->view_outputs().view_json()["Root3"], 3); } diff --git a/morpheus/_lib/tests/llm/test_llm_node_runner.cpp b/morpheus/_lib/tests/llm/test_llm_node_runner.cpp index 70f50b32b5..6662f7ec04 100644 --- a/morpheus/_lib/tests/llm/test_llm_node_runner.cpp +++ b/morpheus/_lib/tests/llm/test_llm_node_runner.cpp @@ -18,9 +18,9 @@ #include "../test_utils/common.hpp" // IWYU pragma: associated #include "morpheus/llm/input_map.hpp" -#include "morpheus/llm/llm_context.hpp" +#include "morpheus/llm/llm_context.hpp" // for LLMContext #include "morpheus/llm/llm_lambda_node.hpp" -#include "morpheus/llm/llm_node.hpp" +#include "morpheus/llm/llm_node.hpp" // for LLMNode #include "morpheus/llm/llm_node_runner.hpp" #include "morpheus/llm/llm_task.hpp" #include "morpheus/types.hpp" @@ -29,11 +29,14 @@ #include #include #include +#include // for JSONValues #include #include #include #include +#include // for tuple +// IWYU pragma: no_include "morpheus/llm/fwd.hpp" using namespace morpheus; using namespace morpheus::test; @@ -134,6 +137,6 @@ TEST_F(TestLLMNodeRunner, Execute) coroutines::sync_wait(coroutines::when_all(runner_1->execute(context), runner_2->execute(context))); - ASSERT_EQ(context->view_outputs()["Root1"], 0); - ASSERT_EQ(context->view_outputs()["Root2"], 1); + ASSERT_EQ(context->view_outputs().view_json()["Root1"], 0); + ASSERT_EQ(context->view_outputs().view_json()["Root2"], 1); } diff --git a/morpheus/_lib/tests/llm/test_llm_task_handler_runner.cpp b/morpheus/_lib/tests/llm/test_llm_task_handler_runner.cpp index ab570704b9..2a0dca7e1e 100644 --- a/morpheus/_lib/tests/llm/test_llm_task_handler_runner.cpp +++ b/morpheus/_lib/tests/llm/test_llm_task_handler_runner.cpp @@ -18,24 +18,24 @@ #include "../test_utils/common.hpp" // IWYU pragma: associated #include "morpheus/llm/input_map.hpp" -#include "morpheus/llm/llm_context.hpp" -#include "morpheus/llm/llm_lambda_node.hpp" -#include "morpheus/llm/llm_node.hpp" -#include "morpheus/llm/llm_node_runner.hpp" +#include "morpheus/llm/llm_context.hpp" // for LLMContext #include "morpheus/llm/llm_task.hpp" #include "morpheus/llm/llm_task_handler.hpp" #include "morpheus/llm/llm_task_handler_runner.hpp" +#include "morpheus/messages/control.hpp" // for ControlMessage #include "morpheus/types.hpp" #include #include #include -#include +#include // IWYU pragma: keep +#include // for nlohmann::json #include #include -#include +#include // for optional #include +// IWYU pragma: no_include "morpheus/llm/fwd.hpp" using namespace morpheus; using namespace morpheus::test; diff --git a/morpheus/llm/nodes/langchain_agent_node.py b/morpheus/llm/nodes/langchain_agent_node.py index ab2671dae8..0980ebfeeb 100644 --- a/morpheus/llm/nodes/langchain_agent_node.py +++ b/morpheus/llm/nodes/langchain_agent_node.py @@ -58,7 +58,7 @@ async def _run_single(self, **kwargs: dict[str, typing.Any]) -> dict[str, typing # Run multiple again results_async = [self._run_single(**x) for x in input_list] - results = await asyncio.gather(*results_async) + results = await asyncio.gather(*results_async, return_exceptions=True) # # Transform from list[dict[str, Any]] to dict[str, list[Any]] # results = {k: [x[k] for x in results] for k in results[0]} @@ -69,9 +69,8 @@ async def _run_single(self, **kwargs: dict[str, typing.Any]) -> dict[str, typing try: return await self._agent_executor.arun(**kwargs) except Exception as e: - error_msg = f"Error running agent: {e}" - logger.exception(error_msg) - return error_msg + logger.exception("Error running agent: %s", e) + return e async def execute(self, context: LLMContext) -> LLMContext: # pylint: disable=invalid-overridden-method diff --git a/tests/llm/nodes/test_langchain_agent_node.py b/tests/llm/nodes/test_langchain_agent_node.py index c3846f8465..7c22b1f14d 100644 --- a/tests/llm/nodes/test_langchain_agent_node.py +++ b/tests/llm/nodes/test_langchain_agent_node.py @@ -142,4 +142,4 @@ def test_execute_error(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMoc return_intermediate_steps=False) node = LangChainAgentNode(agent_executor=agent) - assert execute_node(node, input="input1") == "Error running agent: unittest" + assert isinstance(execute_node(node, input="input1"), RuntimeError) diff --git a/tests/llm/task_handlers/test_simple_task_handler.py b/tests/llm/task_handlers/test_simple_task_handler.py index 3cac5e1d0e..56b633f933 100644 --- a/tests/llm/task_handlers/test_simple_task_handler.py +++ b/tests/llm/task_handlers/test_simple_task_handler.py @@ -21,6 +21,7 @@ from morpheus.llm.task_handlers.simple_task_handler import SimpleTaskHandler from morpheus.messages import ControlMessage from morpheus.messages import MessageMeta +from morpheus.utils.type_aliases import DataFrameType def test_constructor(): @@ -37,7 +38,9 @@ def test_get_input_names(output_columns: list[str] | None, expected_input_names: def test_try_handle(dataset_cudf: DatasetManager): reptiles = ['lizards', 'snakes', 'turtles', 'frogs', 'toads'] - df = dataset_cudf["filter_probs.csv"][0:5] # Take the first 5 rows since there are only have 5 reptiles + + # Take the first 5 rows since there are only have 5 reptiles + df: DataFrameType = dataset_cudf["filter_probs.csv"][0:5] expected_df = df.copy(deep=True) expected_df['reptiles'] = reptiles.copy()