diff --git a/search/classes/manager.php b/search/classes/manager.php index 907a8913d7408..6751a415a7621 100644 --- a/search/classes/manager.php +++ b/search/classes/manager.php @@ -122,6 +122,11 @@ class manager { */ const SEARCH_AREA_CATEGORY_OTHER = 'core-other'; + /** + * @var int To avoid race conditions, do not index documents newer than this many seconds. + */ + const INDEXING_DELAY = 5; + /** * @var \core_search\base[] Enabled search areas. */ @@ -1194,7 +1199,7 @@ public function optimize_index() { * @return bool Whether there was any updated document or not. */ public function index($fullindex = false, $timelimit = 0, ?\progress_trace $progress = null) { - global $DB; + global $DB, $CFG; // Cannot combine time limit with reindex. if ($timelimit && $fullindex) { @@ -1229,6 +1234,11 @@ public function index($fullindex = false, $timelimit = 0, ?\progress_trace $prog $stopat = self::get_current_time() + $timelimit; } + // Work out if we are in test mode, in which case we disable the indexing delay (because + // the normal pattern is to add a document and immediately index it). + $testmode = (PHPUNIT_TEST || defined('BEHAT_TEST')) && + empty($CFG->searchindexingdelayfortestscript); + foreach ($searchareas as $areaid => $searcharea) { $progress->output('Processing area: ' . $searcharea->get_visible_name()); @@ -1244,6 +1254,17 @@ public function index($fullindex = false, $timelimit = 0, ?\progress_trace $prog $prevtimestart = intval(get_config($componentconfigname, $varname . '_indexingstart')); + // The effective start time of previous indexing was some seconds earlier because we + // only index data up to that time, to avoid race conditions (if it takes a while to + // write a document to the database and the timecreated for that document ends up being + // a second or two out of date). This mechanism is disabled for tests. + if (!$testmode) { + // The -1 here is because for example, if _indexingstart is 123, we will have + // indexed everything up to 123 - 5 = 118 (inclusive). So next time, we can start + // at 119 = 123 - 4 and we don't have to repeat 118. + $prevtimestart -= (self::INDEXING_DELAY - 1); + } + if ($fullindex === true) { $referencestarttime = 0; @@ -1278,8 +1299,13 @@ public function index($fullindex = false, $timelimit = 0, ?\progress_trace $prog $options['stopat'] = $stopat; } $options['progress'] = $progress; - $iterator = new skip_future_documents_iterator(new \core\dml\recordset_walk( - $recordset, array($searcharea, 'get_document'), $options)); + // Skip 'future' documents, also any written very recently (to avoid race conditions). + // The exception is for PHPunit and Behat (step 'I update the global search index') + // where we allow it to index recent documents as well, we don't want it to have to wait. + $iterator = new skip_future_documents_iterator( + new \core\dml\recordset_walk($recordset, [$searcharea, 'get_document'], $options), + $indexingstart - ($testmode ? 0 : self::INDEXING_DELAY), + ); $result = $this->engine->add_documents($iterator, $searcharea, $options); $recordset->close(); $batchinfo = ''; diff --git a/search/tests/manager_test.php b/search/tests/manager_test.php index a24cef371617c..0258d06a36317 100644 --- a/search/tests/manager_test.php +++ b/search/tests/manager_test.php @@ -1625,4 +1625,72 @@ public function test_context_deletion(): void { ]; $this->assertEquals($expected, $search->get_engine()->get_and_clear_deletes()); } + + /** + * Tests the indexing delay (used to avoid race conditions) in {@see manager::index()}. + * + * @covers \core_search\manager::index + */ + public function test_indexing_delay(): void { + global $USER, $CFG; + + $this->resetAfterTest(); + + // Normally the indexing delay is turned off for test scripts because we don't want to have + // to wait 5 seconds after creating anything to index it and it's not like there will be a + // race condition (indexing doesn't run at same time as adding). This turns it on. + $CFG->searchindexingdelayfortestscript = true; + + $this->setAdminUser(); + + // Create a course and a forum. + $generator = $this->getDataGenerator(); + $course = $generator->create_course(); + $forum = $generator->create_module('forum', ['course' => $course->id]); + + // Skip ahead 5 seconds so everything gets indexed. + $now = time(); + $now += manager::INDEXING_DELAY; + $search = \testable_core_search::instance(); + $search->fake_current_time($now); + $search->index(); + $search->get_engine()->get_and_clear_added_documents(); + + // Basic discussion data. + $basicdata = [ + 'course' => $course->id, + 'forum' => $forum->id, + 'userid' => $USER->id, + ]; + // Discussion so old it's prior to indexing delay (not realistic). + $generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata, + ['timemodified' => $now - manager::INDEXING_DELAY, 'name' => 'Frog'])); + // Discussion just within indexing delay (simulates if it took a while to add to database). + $generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata, + ['timemodified' => $now - (manager::INDEXING_DELAY - 1), 'name' => 'Toad'])); + // Move time along a bit. + $now += 100; + $search->fake_current_time($now); + // Discussion that happened 5 seconds before the new now. + $generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata, + ['timemodified' => $now - (manager::INDEXING_DELAY), 'name' => 'Zombie'])); + // This one only happened 4 seconds before so it shouldn't be indexed yet. + $generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata, + ['timemodified' => $now - (manager::INDEXING_DELAY - 1), 'name' => 'Werewolf'])); + + // Reindex and check that it added the middle two discussions. + $search->index(); + $added = $search->get_engine()->get_and_clear_added_documents(); + $this->assertCount(2, $added); + $this->assertEquals('Toad', $added[0]->get('title')); + $this->assertEquals('Zombie', $added[1]->get('title')); + + // Move time forwards a couple of seconds and now the last one will get indexed. + $now += 2; + $search->fake_current_time($now); + $search->index(); + $added = $search->get_engine()->get_and_clear_added_documents(); + $this->assertCount(1, $added); + $this->assertEquals('Werewolf', $added[0]->get('title')); + } }