Skip to content

Commit

Permalink
Make sure ConcurrentApproximatePriorityQueue#poll never returns `nu…
Browse files Browse the repository at this point in the history
…ll` on a non-empty queue.

Before this change, `ConcurrentApproximatePriorityQueue#poll` could sometimes
return `null` even though the queue was empty at no point in time. The
practical implication is that we can end up with more DWPTs in memory than
indexing threads, which, while not strictly a bug, may require doing more
merging than we'd like later on.

I ran luceneutil's `IndexGeonames` with this change, and
`ConcurrentApproximatePriorityQueue#poll` was not the main source of
contention. I instrumented the code to check how many DWPTs got pulled from the
queue using the optimistic path vs. pessimistic path and got 8525598 for the
optimistic path vs. 12247 for the pessimistic path.

Closes apache#12649 apache#12916
  • Loading branch information
jpountz committed Dec 21, 2023
1 parent 91002d0 commit 6c8f8e1
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.lucene.index;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Predicate;
Expand Down Expand Up @@ -116,20 +118,32 @@ T poll(Predicate<T> predicate) {
}
}
}
for (int i = 0; i < concurrency; ++i) {
final int index = (threadHash + i) % concurrency;
final Lock lock = locks[index];
final ApproximatePriorityQueue<T> queue = queues[index];
lock.lock();
try {

// We want to make sure we return a non-null entry if this queue is not empty. This requires us
// to not release locks until we're done, otherwise if there is a single non-empty sub queue, as
// we iterate through all sub queues, there is a chance that an entry gets added to a queue we
// just checked and that the existing entry gets removed from a queue we haven't checked yet.
// This would make this method return `null` even though the queue was empty at no point in
// time.

final List<Lock> toUnlock = new ArrayList<>();
try {
for (int index = 0; index < concurrency; ++index) {
final Lock lock = locks[index];
final ApproximatePriorityQueue<T> queue = queues[index];
lock.lock();
toUnlock.add(lock);
T entry = queue.poll(predicate);
if (entry != null) {
return entry;
}
} finally {
}
} finally {
for (Lock lock : toUnlock) {
lock.unlock();
}
}

return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,41 @@ public void run() {
assertEquals(Integer.valueOf(3), pq.poll(x -> true));
assertNull(pq.poll(x -> true));
}

public void testNeverReturnNullOnNonEmptyQueue() throws Exception {
final int iters = atLeast(10);
for (int iter = 0; iter < iters; ++iter) {
final int concurrency = TestUtil.nextInt(random(), 1, 16);
final ConcurrentApproximatePriorityQueue<Integer> queue =
new ConcurrentApproximatePriorityQueue<>(concurrency);
final int numThreads = TestUtil.nextInt(random(), 2, 16);
final Thread[] threads = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
for (int t = 0; t < threads.length; ++t) {
threads[t] =
new Thread(
() -> {
try {
startingGun.await();
} catch (InterruptedException e) {
throw new ThreadInterruptedException(e);
}
Integer v = TestUtil.nextInt(random(), 0, 100);
queue.add(v, v);
for (int i = 0; i < 1_000; ++i) {
v = queue.poll(x -> true);
assertNotNull(v);
queue.add(v, v);
}
});
}
for (Thread thread : threads) {
thread.start();
}
startingGun.countDown();
for (Thread thread : threads) {
thread.join();
}
}
}
}

0 comments on commit 6c8f8e1

Please sign in to comment.