Skip to content

Commit

Permalink
MDL-83211 core_search: Indexing can miss items due to race condition
Browse files Browse the repository at this point in the history
Changes search indexing so that (except for test scripts) each indexing
run indexes only items up to 5 seconds ago, and the following indexing
will start from the same point. This ensures all items are indexed,
even if an item is written to the database one or two seconds after its
timecreated value.
  • Loading branch information
sammarshallou committed Nov 15, 2024
1 parent e1d0518 commit d12989b
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 3 deletions.
32 changes: 29 additions & 3 deletions search/classes/manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ class manager {
*/
const SEARCH_AREA_CATEGORY_OTHER = 'core-other';

/**
* @var int To avoid race conditions, do not index documents newer than this many seconds.
*/
const INDEXING_DELAY = 5;

/**
* @var \core_search\base[] Enabled search areas.
*/
Expand Down Expand Up @@ -1194,7 +1199,7 @@ public function optimize_index() {
* @return bool Whether there was any updated document or not.
*/
public function index($fullindex = false, $timelimit = 0, ?\progress_trace $progress = null) {
global $DB;
global $DB, $CFG;

// Cannot combine time limit with reindex.
if ($timelimit && $fullindex) {
Expand Down Expand Up @@ -1229,6 +1234,11 @@ public function index($fullindex = false, $timelimit = 0, ?\progress_trace $prog
$stopat = self::get_current_time() + $timelimit;
}

// Work out if we are in test mode, in which case we disable the indexing delay (because
// the normal pattern is to add a document and immediately index it).
$testmode = (PHPUNIT_TEST || defined('BEHAT_TEST')) &&
empty($CFG->searchindexingdelayfortestscript);

foreach ($searchareas as $areaid => $searcharea) {

$progress->output('Processing area: ' . $searcharea->get_visible_name());
Expand All @@ -1244,6 +1254,17 @@ public function index($fullindex = false, $timelimit = 0, ?\progress_trace $prog

$prevtimestart = intval(get_config($componentconfigname, $varname . '_indexingstart'));

// The effective start time of previous indexing was some seconds earlier because we
// only index data up to that time, to avoid race conditions (if it takes a while to
// write a document to the database and the timecreated for that document ends up being
// a second or two out of date). This mechanism is disabled for tests.
if (!$testmode) {
// The -1 here is because for example, if _indexingstart is 123, we will have
// indexed everything up to 123 - 5 = 118 (inclusive). So next time, we can start
// at 119 = 123 - 4 and we don't have to repeat 118.
$prevtimestart -= (self::INDEXING_DELAY - 1);
}

if ($fullindex === true) {
$referencestarttime = 0;

Expand Down Expand Up @@ -1278,8 +1299,13 @@ public function index($fullindex = false, $timelimit = 0, ?\progress_trace $prog
$options['stopat'] = $stopat;
}
$options['progress'] = $progress;
$iterator = new skip_future_documents_iterator(new \core\dml\recordset_walk(
$recordset, array($searcharea, 'get_document'), $options));
// Skip 'future' documents, also any written very recently (to avoid race conditions).
// The exception is for PHPunit and Behat (step 'I update the global search index')
// where we allow it to index recent documents as well, we don't want it to have to wait.
$iterator = new skip_future_documents_iterator(
new \core\dml\recordset_walk($recordset, [$searcharea, 'get_document'], $options),
$indexingstart - ($testmode ? 0 : self::INDEXING_DELAY),
);
$result = $this->engine->add_documents($iterator, $searcharea, $options);
$recordset->close();
$batchinfo = '';
Expand Down
68 changes: 68 additions & 0 deletions search/tests/manager_test.php
Original file line number Diff line number Diff line change
Expand Up @@ -1625,4 +1625,72 @@ public function test_context_deletion(): void {
];
$this->assertEquals($expected, $search->get_engine()->get_and_clear_deletes());
}

/**
* Tests the indexing delay (used to avoid race conditions) in {@see manager::index()}.
*
* @covers \core_search\manager::index
*/
public function test_indexing_delay(): void {
global $USER, $CFG;

$this->resetAfterTest();

// Normally the indexing delay is turned off for test scripts because we don't want to have
// to wait 5 seconds after creating anything to index it and it's not like there will be a
// race condition (indexing doesn't run at same time as adding). This turns it on.
$CFG->searchindexingdelayfortestscript = true;

$this->setAdminUser();

// Create a course and a forum.
$generator = $this->getDataGenerator();
$course = $generator->create_course();
$forum = $generator->create_module('forum', ['course' => $course->id]);

// Skip ahead 5 seconds so everything gets indexed.
$now = time();
$now += manager::INDEXING_DELAY;
$search = \testable_core_search::instance();
$search->fake_current_time($now);
$search->index();
$search->get_engine()->get_and_clear_added_documents();

// Basic discussion data.
$basicdata = [
'course' => $course->id,
'forum' => $forum->id,
'userid' => $USER->id,
];
// Discussion so old it's prior to indexing delay (not realistic).
$generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata,
['timemodified' => $now - manager::INDEXING_DELAY, 'name' => 'Frog']));
// Discussion just within indexing delay (simulates if it took a while to add to database).
$generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata,
['timemodified' => $now - (manager::INDEXING_DELAY - 1), 'name' => 'Toad']));
// Move time along a bit.
$now += 100;
$search->fake_current_time($now);
// Discussion that happened 5 seconds before the new now.
$generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata,
['timemodified' => $now - (manager::INDEXING_DELAY), 'name' => 'Zombie']));
// This one only happened 4 seconds before so it shouldn't be indexed yet.
$generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata,
['timemodified' => $now - (manager::INDEXING_DELAY - 1), 'name' => 'Werewolf']));

// Reindex and check that it added the middle two discussions.
$search->index();
$added = $search->get_engine()->get_and_clear_added_documents();
$this->assertCount(2, $added);
$this->assertEquals('Toad', $added[0]->get('title'));
$this->assertEquals('Zombie', $added[1]->get('title'));

// Move time forwards a couple of seconds and now the last one will get indexed.
$now += 2;
$search->fake_current_time($now);
$search->index();
$added = $search->get_engine()->get_and_clear_added_documents();
$this->assertCount(1, $added);
$this->assertEquals('Werewolf', $added[0]->get('title'));
}
}

0 comments on commit d12989b

Please sign in to comment.