Skip to content

Commit

Permalink
update the tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Alleria1809 committed Jun 30, 2024
1 parent bf8adf5 commit b655780
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 137 deletions.
137 changes: 0 additions & 137 deletions lightrag/tests/test_gt_text_splitter.py

This file was deleted.

63 changes: 63 additions & 0 deletions lightrag/tests/test_text_splitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@

import unittest
from lightrag.core.types import Document
from lightrag.components.data_process.text_splitter import TextSplitter

class TestTextSplitter(unittest.TestCase):

def setUp(self):
# Set up a TextSplitter instance before each test
self.splitter = TextSplitter(split_by="word", chunk_size=5, chunk_overlap=2)

def test_invalid_split_by(self):
# Test initialization with invalid split_by value
with self.assertRaises(ValueError):
TextSplitter(split_by="invalid", chunk_size=5, chunk_overlap=0)

def test_negative_chunk_size(self):
# Test initialization with negative chunk_size
with self.assertRaises(ValueError):
TextSplitter(split_by="word", chunk_size=-1, chunk_overlap=0)

def test_negative_chunk_overlap(self):
# Test initialization with negative chunk_overlap
with self.assertRaises(ValueError):
TextSplitter(split_by="word", chunk_size=5, chunk_overlap=-1)

def test_split_by_word(self):
# Test the basic functionality of splitting by word
text = "This is a simple test"
expected = ["This is a simple test"]
result = self.splitter.split_text(text)
self.assertEqual(result, expected)

def test_split_by_sentence(self):
# Test splitting by sentence
splitter = TextSplitter(split_by="sentence", chunk_size=1, chunk_overlap=0)
text = "This is a test. It should work well."
expected = ["This is a test.", " It should work well."]
result = splitter.split_text(text)
self.assertEqual(result, expected)

def test_overlap_handling(self):
# Test proper handling of overlap
text = "one two three four five six seven"
expected = ["one two three four five ", "four five six seven"]
result = self.splitter.split_text(text)
self.assertEqual(result, expected)

def test_document_splitting(self):
# Test splitting a list of documents
docs = [Document(text="This is a simple test to check splitting.", id="1")]
expected_texts = ["This is a simple test ", "simple test to check splitting."]
result = self.splitter.call(docs)
result_texts = [doc.text for doc in result]
self.assertEqual(result_texts, expected_texts)

def test_empty_text_handling(self):
# Test handling of empty text
with self.assertRaises(ValueError):
self.splitter.call([Document(text=None, id="1")])

if __name__ == '__main__':
unittest.main()

0 comments on commit b655780

Please sign in to comment.