diff --git a/src/cpp/src/generation_handle.cpp b/src/cpp/src/generation_handle.cpp index d9fe91ee86..0bd96cc56a 100644 --- a/src/cpp/src/generation_handle.cpp +++ b/src/cpp/src/generation_handle.cpp @@ -17,7 +17,7 @@ GenerationStatus GenerationHandleImpl::get_status() { } bool GenerationHandleImpl::can_read() { - return m_generation_stream->can_read(); + return !is_dropped() && m_generation_stream->can_read(); } bool GenerationHandleImpl::is_dropped() { @@ -55,7 +55,7 @@ std::vector GenerationHandleImpl::read_all() { std::vector results; std::unordered_map partial_results; // We iterate until generation is running or there are tokens we haven't read yet - while (!is_dropped() && (get_status() == GenerationStatus::RUNNING || can_read())) { + while (get_status() == GenerationStatus::RUNNING || can_read()) { // For unary case there's only one iteration and we get all results in a single read() call std::unordered_map iteration_results = read(); add_partial_result(partial_results, iteration_results);