-
Notifications
You must be signed in to change notification settings - Fork 218
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bf8adf5
commit b655780
Showing
2 changed files
with
63 additions
and
137 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
|
||
import unittest | ||
from lightrag.core.types import Document | ||
from lightrag.components.data_process.text_splitter import TextSplitter | ||
|
||
class TestTextSplitter(unittest.TestCase): | ||
|
||
def setUp(self): | ||
# Set up a TextSplitter instance before each test | ||
self.splitter = TextSplitter(split_by="word", chunk_size=5, chunk_overlap=2) | ||
|
||
def test_invalid_split_by(self): | ||
# Test initialization with invalid split_by value | ||
with self.assertRaises(ValueError): | ||
TextSplitter(split_by="invalid", chunk_size=5, chunk_overlap=0) | ||
|
||
def test_negative_chunk_size(self): | ||
# Test initialization with negative chunk_size | ||
with self.assertRaises(ValueError): | ||
TextSplitter(split_by="word", chunk_size=-1, chunk_overlap=0) | ||
|
||
def test_negative_chunk_overlap(self): | ||
# Test initialization with negative chunk_overlap | ||
with self.assertRaises(ValueError): | ||
TextSplitter(split_by="word", chunk_size=5, chunk_overlap=-1) | ||
|
||
def test_split_by_word(self): | ||
# Test the basic functionality of splitting by word | ||
text = "This is a simple test" | ||
expected = ["This is a simple test"] | ||
result = self.splitter.split_text(text) | ||
self.assertEqual(result, expected) | ||
|
||
def test_split_by_sentence(self): | ||
# Test splitting by sentence | ||
splitter = TextSplitter(split_by="sentence", chunk_size=1, chunk_overlap=0) | ||
text = "This is a test. It should work well." | ||
expected = ["This is a test.", " It should work well."] | ||
result = splitter.split_text(text) | ||
self.assertEqual(result, expected) | ||
|
||
def test_overlap_handling(self): | ||
# Test proper handling of overlap | ||
text = "one two three four five six seven" | ||
expected = ["one two three four five ", "four five six seven"] | ||
result = self.splitter.split_text(text) | ||
self.assertEqual(result, expected) | ||
|
||
def test_document_splitting(self): | ||
# Test splitting a list of documents | ||
docs = [Document(text="This is a simple test to check splitting.", id="1")] | ||
expected_texts = ["This is a simple test ", "simple test to check splitting."] | ||
result = self.splitter.call(docs) | ||
result_texts = [doc.text for doc in result] | ||
self.assertEqual(result_texts, expected_texts) | ||
|
||
def test_empty_text_handling(self): | ||
# Test handling of empty text | ||
with self.assertRaises(ValueError): | ||
self.splitter.call([Document(text=None, id="1")]) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |