diff --git a/knowledge_storm/__init__.py b/knowledge_storm/__init__.py index 7a55e52a..f93158a7 100644 --- a/knowledge_storm/__init__.py +++ b/knowledge_storm/__init__.py @@ -7,4 +7,4 @@ from .utils import * from .dataclass import * -__version__ = "1.0.0" \ No newline at end of file +__version__ = "1.0.0" diff --git a/knowledge_storm/collaborative_storm/engine.py b/knowledge_storm/collaborative_storm/engine.py index 2a68a3dc..01684b9b 100644 --- a/knowledge_storm/collaborative_storm/engine.py +++ b/knowledge_storm/collaborative_storm/engine.py @@ -296,19 +296,20 @@ class TurnPolicySpec: Represents the policy specifications for determining the behavior of a conversation turn. Attributes: - should_reorganize_knowledge_base (bool): - A flag that indicates whether the knowledge base should be reorganized after the current turn. - - should_update_experts_list (bool): + should_reorganize_knowledge_base (bool): + A flag that indicates whether the knowledge base should be reorganized after the current turn. + + should_update_experts_list (bool): A flag that indicates whether the list of experts should be updated based on the conversation context. - - should_polish_utterance (bool): + + should_polish_utterance (bool): A flag that indicates whether the generated utterance should be polished (e.g., refined or rephrased) before it is used in the conversation. - - agent (Agent): - The `Agent` responsible for generating utterances or responses during the conversation turn. - This agent interacts with the knowledge base and the conversation history to produce responses. + + agent (Agent): + The `Agent` responsible for generating utterances or responses during the conversation turn. + This agent interacts with the knowledge base and the conversation history to produce responses. """ + should_reorganize_knowledge_base: bool = False should_update_experts_list: bool = False should_polish_utterance: bool = False @@ -626,7 +627,7 @@ def warm_start(self): def generate_report(self) -> str: """ - Generate report leveraging organized collected information in the knowledge base (i.e. mind map). + Generate report leveraging organized collected information in the knowledge base (i.e. mind map). The article generation follows the paradigm in STORM paper, where it considers mind map nodes as section names, and generate the report section by section. Returns: @@ -650,8 +651,8 @@ def step( """ Yields a single turn in the conversation flow. - This method take a user input when user choose to inject an utterance or generates the next system utterance based on the current conversation history and defined discourse policies. - It handles updating the conversation history, managing expert lists, and interacting with the knowledge base. + This method take a user input when user choose to inject an utterance or generates the next system utterance based on the current conversation history and defined discourse policies. + It handles updating the conversation history, managing expert lists, and interacting with the knowledge base. Additionally, it logs each stage of the conversation for monitoring and debugging purposes. Args: @@ -665,13 +666,13 @@ def step( Workflow: 1. User Utterance Handling - If `user_utterance` is provided, it is appended to the `conversation_history` - + 2. System Utterance Generation - If no `user_utterance` is provided, the method proceeds to generate the next system utterance. - Determines the next turn policy by consulting the `discourse_manager` with the current conversation history. - Generates a new utterance using the agent defined in the turn policy, leveraging the `knowledge_base` and `conversation_history`. - If the turn policy indicates that the experts list should be updated, it updates the expert list based on the latest utterances. - + 4. Knowledge Base Update - Inserts the new turn into the `knowledge_base`, optionally allowing the creation of new nodes or inserting under the root based on the `rag_only_baseline_mode` flag. - If the turn policy specifies, it reorganizes the `knowledge_base` to maintain optimal structure and relevance. diff --git a/knowledge_storm/collaborative_storm/modules/co_storm_agents.py b/knowledge_storm/collaborative_storm/modules/co_storm_agents.py index 3fbe122d..afc138d4 100644 --- a/knowledge_storm/collaborative_storm/modules/co_storm_agents.py +++ b/knowledge_storm/collaborative_storm/modules/co_storm_agents.py @@ -24,7 +24,7 @@ class CoStormExpert(Agent): """ Represents an expert agent in the Co-STORM framework. - The `CoStormExpert` is a specialized type of `Agent` that is tasked with participating in roundtable discussions within the Co-STORM system. + The `CoStormExpert` is a specialized type of `Agent` that is tasked with participating in roundtable discussions within the Co-STORM system. The expert uses language models to generate action plans, answer questions, and polish its utterances based on the current conversation history and knowledge base. It interacts with modules for action planning and question answering grounding on provided retrieval models. @@ -38,6 +38,7 @@ class CoStormExpert(Agent): rm (Optional[dspy.Retrieve], optional): A retrieval module used for fetching external knowledge or context. callback_handler (BaseCallbackHandler, optional): Handles log message printing """ + def __init__( self, topic: str, @@ -112,6 +113,7 @@ class SimulatedUser(Agent): For more information, please refer to Section 3.4 of Co-STORM paper: https://www.arxiv.org/pdf/2408.15232 """ + def __init__( self, topic: str, @@ -155,13 +157,14 @@ def generate_utterance( class Moderator(Agent): """ - The moderator's role in the Co-STORM framework is to inject new perspectives into the conversation to avoid stagnation, repetition, or overly niche discussions. - This is achieved by generating questions based on unused, uncited snippets of information retrieved since the last moderator's turn. - The selected information is reranked according to its relevance to the conversation topic and its dissimilarity to the original question. + The moderator's role in the Co-STORM framework is to inject new perspectives into the conversation to avoid stagnation, repetition, or overly niche discussions. + This is achieved by generating questions based on unused, uncited snippets of information retrieved since the last moderator's turn. + The selected information is reranked according to its relevance to the conversation topic and its dissimilarity to the original question. The resulting top-ranked snippets are used to generate an informed question to be presented to the conversation participants. For more information, please refer to Section 3.5 of Co-STORM paper: https://www.arxiv.org/pdf/2408.15232 """ + def __init__( self, topic: str, @@ -319,6 +322,7 @@ class PureRAGAgent(Agent): It does not utilize any other information besides query itself. It's designed for Co-STORM paper baseline comparison. """ + def __init__( self, topic: str, diff --git a/knowledge_storm/collaborative_storm/modules/grounded_question_generation.py b/knowledge_storm/collaborative_storm/modules/grounded_question_generation.py index 8f810d9d..331692ca 100644 --- a/knowledge_storm/collaborative_storm/modules/grounded_question_generation.py +++ b/knowledge_storm/collaborative_storm/modules/grounded_question_generation.py @@ -6,6 +6,7 @@ For more detailed information, refer to Section 3.5 of the Co-STORM paper: https://www.arxiv.org/pdf/2408.15232. """ + import dspy from typing import List, Union diff --git a/knowledge_storm/dataclass.py b/knowledge_storm/dataclass.py index 153b9617..fd981905 100644 --- a/knowledge_storm/dataclass.py +++ b/knowledge_storm/dataclass.py @@ -292,14 +292,14 @@ class KnowledgeBase: """ Represents the dynamic, hierarchical mind map used in Co-STORM to track and organize discourse. - The knowledge base serves as a shared conceptual space between the user and the system, allowing for effective collaboration by reducing the user's cognitive load and ensuring that the discourse is easy to follow. - + The knowledge base serves as a shared conceptual space between the user and the system, allowing for effective collaboration by reducing the user's cognitive load and ensuring that the discourse is easy to follow. + The knowledge base is structured as a tree (or mind map) that dynamically organizes collected information and concepts as the conversation progresses. - The mind map consists of concepts (nodes) and edges that represent parent-child relationships among topics. Each concept is linked to retrieved information, + The mind map consists of concepts (nodes) and edges that represent parent-child relationships among topics. Each concept is linked to retrieved information, which is placed under the most appropriate concept based on its associated question and semantic similarity. - For more details, please refer to Section 3.2 of Co-STORM paper: https://www.arxiv.org/pdf/2408.15232 + For more details, please refer to Section 3.2 of Co-STORM paper: https://www.arxiv.org/pdf/2408.15232 Attributes: root (KnowledgeNode): The root node of the hierarchical knowledge base, representing the top-level concept. @@ -827,14 +827,14 @@ def get_knowledge_base_summary(self): def reogranize(self): """ - Reorganizes the knowledge base through two main processes: top-down expansion and bottom-up cleaning. + Reorganizes the knowledge base through two main processes: top-down expansion and bottom-up cleaning. - The reorganization process ensures that the knowledge base remains well-structured and relevant as new information is added. It consists of the following steps: - 1.Top-Down Expansion: Expands nodes that have accumulated significant amounts of information by creating subtopics, - ensuring that each concept remains specific and manageable. - 2.Bottom-Up Cleaning: Cleans the knowledge base by removing empty leaf nodes (nodes with no supporting information) - and merging nodes that have only a single child, simplifying the structure and maintaining clarity. - """ + The reorganization process ensures that the knowledge base remains well-structured and relevant as new information is added. It consists of the following steps: + 1.Top-Down Expansion: Expands nodes that have accumulated significant amounts of information by creating subtopics, + ensuring that each concept remains specific and manageable. + 2.Bottom-Up Cleaning: Cleans the knowledge base by removing empty leaf nodes (nodes with no supporting information) + and merging nodes that have only a single child, simplifying the structure and maintaining clarity. + """ # pre-processing self.trim_empty_leaf_nodes() self.merge_single_child_nodes() diff --git a/knowledge_storm/interface.py b/knowledge_storm/interface.py index f5727d57..5922602f 100644 --- a/knowledge_storm/interface.py +++ b/knowledge_storm/interface.py @@ -567,13 +567,13 @@ class Agent(ABC): """ Interface for STORM and Co-STORM LLM agent - This class must be implemented by any subclass of `Agent` to define how the agent generates an utterance. - The generated utterance can be influenced by the conversation history, knowledge base, and any additional parameters passed via `kwargs`. + This class must be implemented by any subclass of `Agent` to define how the agent generates an utterance. + The generated utterance can be influenced by the conversation history, knowledge base, and any additional parameters passed via `kwargs`. The implementation should align with the specific role and perspective of the agent, as defined by the agent's topic, role name, and role description. Args: - knowledge_base (KnowledgeBase): The current knowledge base (e.g., mind map in Co-STORM) that contains the accumulated information relevant to the conversation. - conversation_history (List[ConversationTurn]): A list of past conversation turns, providing context for generating the next utterance. + knowledge_base (KnowledgeBase): The current knowledge base (e.g., mind map in Co-STORM) that contains the accumulated information relevant to the conversation. + conversation_history (List[ConversationTurn]): A list of past conversation turns, providing context for generating the next utterance. The agent can refer to this history to maintain continuity and relevance in the conversation. logging_wrapper (LoggingWrapper): A wrapper used for logging important events during the utterance generation process. **kwargs: Additional arguments that can be passed to the method for more specialized utterance generation behavior depending on the agent's specific implementation. @@ -585,6 +585,7 @@ class Agent(ABC): - Subclasses of `Agent` should define the exact strategy for generating the utterance, which could involve interacting with a language model, retrieving relevant knowledge, or following specific conversational policies. - The agent's role, perspective, and the knowledge base content will influence how the utterance is formulated. """ + from .dataclass import KnowledgeBase, ConversationTurn def __init__(self, topic: str, role_name: str, role_description: str):