From 569794c79ccabefd1909ff89dfcfce1abc34bdd7 Mon Sep 17 00:00:00 2001 From: Weves Date: Tue, 14 Jan 2025 16:57:18 -0800 Subject: [PATCH] Avoid double scan --- backend/onyx/background/indexing/run_indexing.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/backend/onyx/background/indexing/run_indexing.py b/backend/onyx/background/indexing/run_indexing.py index b0ee39883ae..6516647f555 100644 --- a/backend/onyx/background/indexing/run_indexing.py +++ b/backend/onyx/background/indexing/run_indexing.py @@ -123,11 +123,8 @@ def strip_null_characters(doc_batch: list[Document]) -> list[Document]: ) section.link = section.link.replace("\x00", "") - if section.text and "\x00" in section.text: - logger.warning( - f"NUL characters found in document text for document: {cleaned_doc.id}" - ) - section.text = section.text.replace("\x00", "") + # since text can be longer, just replace to avoid double scan + section.text = section.text.replace("\x00", "") cleaned_batch.append(cleaned_doc)