Skip to content

Commit

Permalink
MDL-83119 search_solr: Implement check on connectivity, space usage
Browse files Browse the repository at this point in the history
Implements a status check which confirms that the Solr search engine
is available. Optionally, the check can also show a warning if the
index grows beyond a certain size.

As part of this change, a new API was added in search_solr\engine
to allow using http_client (Guzzle) instead of raw Curl; this makes
it easier to create mock tests in PHPunit for the new functionality.
  • Loading branch information
sammarshallou committed Nov 15, 2024
1 parent 269a8a8 commit b6a1558
Show file tree
Hide file tree
Showing 7 changed files with 1,037 additions and 4 deletions.
126 changes: 126 additions & 0 deletions search/engine/solr/classes/check/connection.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.

namespace search_solr\check;

use core\check\check;
use core\check\result;
use core\output\html_writer;

/**
* Check that the connection to Solr works.
*
* @package search_solr
* @copyright 2024 The Open University
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class connection extends check {
#[\Override]
public function get_name(): string {
return get_string('pluginname', 'search_solr');
}

#[\Override]
public function get_action_link(): ?\action_link {
return new \action_link(
new \moodle_url('/admin/settings.php', ['section' => 'searchsolr']),
get_string('settings'));
}


#[\Override]
public function get_result(): result {
global $CFG;

$result = result::OK;
$resultstr = '';
$resultdetails = '';

try {
// We do not use manager::instance as this will already try to connect to the engine,
// we only want to do the specific get_status call below and nothing else. So use
// search_engine_instance. We know it will be a Solr instance if we got here.
/** @var \search_solr\engine $engine */
$engine = \core_search\manager::search_engine_instance();

// Get engine status.
$status = $engine->get_status(5);

$time = number_format($status['time'], 2) . 's';
$resultstr = get_string('check_time', 'search_solr', $time);
} catch (\Throwable $t) {
$status = [
'connected' => false,
'foundcore' => false,
'error' => 'Exception when creating search manager: ' . $t->getMessage(),
'exception' => $t,
];
}

if (!$status['connected']) {
// No connection at all.
$result = result::ERROR;
$resultstr = get_string('check_notconnected', 'search_solr');
$resultdetails .= \html_writer::tag('p', s($status['error']));

} else if (!$status['foundcore']) {
// There's a connection, but the core doesn't seem to exist.
$result = result::ERROR;
$resultstr = get_string('check_nocore', 'search_solr');
$resultdetails .= \html_writer::tag('p', s($status['error']));

} else {
// Errors related to finding the core size only show if the size warning is configured.
$sizelimit = get_config('search_solr', 'indexsizelimit');
if (!array_key_exists('indexsize', $status)) {
if ($sizelimit) {
$result = result::ERROR;
$resultstr = get_string('check_nosize', 'search_solr');
$resultdetails .= \html_writer::tag('p', s($status['error']));
}
} else {
// Show the index size in result, even if we aren't checking it.
$sizestr = get_string(
'indexsize',
'search_solr',
display_size($status['indexsize']),
);
$resultdetails .= \html_writer::tag('p', $sizestr);
if ($sizelimit) {
// Error at specified index size, warning at 90% of it.
$sizewarning = ($sizelimit * 9) / 10;
if ($status['indexsize'] > $sizewarning) {
if ($status['indexsize'] > $sizelimit) {
$resultstr = get_string('check_indextoobig', 'search_solr');
$result = result::ERROR;
} else {
// We don't say it's too big because it isn't yet, just show the size.
$resultstr = $sizestr;
$result = result::WARNING;
}
}
}
}
}

$ex = $status['exception'] ?? null;
if ($ex) {
$resultdetails .= \html_writer::tag('pre', str_replace($CFG->dirroot, '', s($ex->getTraceAsString())));
}

return new result($result, $resultstr, $resultdetails);
}
}
209 changes: 205 additions & 4 deletions search/engine/solr/classes/engine.php
Original file line number Diff line number Diff line change
Expand Up @@ -1340,6 +1340,102 @@ public function is_installed() {
return function_exists('solr_get_version');
}

/** @var int When using the capath option, we generate a bundle containing all the pem files, cached 10 mins. */
const CA_PATH_CACHE_TIME = 600;

/** @var int Expired cache files are deleted after this many seconds. */
const CA_PATH_CACHE_DELETE_AFTER = 60;

/**
* Gets status of Solr server.
*
* The result has the following fields:
* - connected - true if we got a valid JSON response from server
* - foundcore - true if we found the core defined in config (this could be false if schema not set up)
*
* It may have these other fields:
* - error - text if anything went wrong
* - exception - if an exception was thrown
* - indexsize - index size in bytes if we found what it is
*
* @param int $timeout Optional timeout in seconds, otherwise uses config value
* @return array Array with information about status
* @since Moodle 5.0
*/
public function get_status($timeout = 0): array {
$result = ['connected' => false, 'foundcore' => false];
try {
$options = [];
if ($timeout) {
$options['connect_timeout'] = $timeout;
$options['read_timeout'] = $timeout;
}
$before = microtime(true);
try {
$response = $this->raw_get_request('admin/cores', $options);
} finally {
$result['time'] = microtime(true) - $before;
}
$status = $response->getStatusCode();
if ($status !== 200) {
$result['error'] = 'Unsuccessful status code: ' . $status;
return $result;
}
$decoded = json_decode($response->getBody()->getContents());
if (!$decoded) {
$result['error'] = 'Invalid JSON';
return $result;
}
// Provided we get some valid JSON then probably Solr exists and is responding.
// Any following errors we don't count as not connected (ERROR display in the check)
// because maybe it happens if Solr changes their JSON format in a future version.
$result['connected'] = true;
if (!property_exists($decoded, 'status')) {
$result['error'] = 'Unexpected JSON: no core status';
return $result;
}
foreach ($decoded->status as $core) {
$match = false;
if (!property_exists($core, 'name')) {
$result['error'] = 'Unexpected JSON: core has no name';
return $result;
}
if ($core->name === $this->config->indexname) {
$match = true;
}
if (!$match && property_exists($core, 'cloud')) {
if (!property_exists($core->cloud, 'collection')) {
$result['error'] = 'Unexpected JSON: core cloud has no name';
return $result;
}
if ($core->cloud->collection === $this->config->indexname) {
$match = true;
}
}

if ($match) {
$result['foundcore'] = true;
if (!property_exists($core, 'index')) {
$result['error'] = 'Unexpected JSON: core has no index';
return $result;
}
if (!property_exists($core->index, 'sizeInBytes')) {
$result['error'] = 'Unexpected JSON: core index has no sizeInBytes';
return $result;
}
$result['indexsize'] = $core->index->sizeInBytes;
return $result;
}
}
$result['error'] = 'Could not find core matching ' . $this->config->indexname;;
return $result;
} catch (\Throwable $t) {
$result['error'] = 'Exception occurred: ' . $t->getMessage();
$result['exception'] = $t;
return $result;
}
}

/**
* Returns the solr client instance.
*
Expand Down Expand Up @@ -1453,23 +1549,128 @@ public function get_curl_object() {
}

/**
* Return a Moodle url object for the server connection.
* Return a Moodle url object for the raw server URL (containing all indexes).
*
* @param string $path The solr path to append.
* @return \moodle_url
*/
public function get_connection_url($path) {
public function get_server_url(string $path): \moodle_url {
// Must use the proper protocol, or SSL will fail.
$protocol = !empty($this->config->secure) ? 'https' : 'http';
$url = $protocol . '://' . rtrim($this->config->server_hostname, '/');
if (!empty($this->config->server_port)) {
$url .= ':' . $this->config->server_port;
}
$url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/');

$url .= '/solr/' . ltrim($path, '/');
return new \moodle_url($url);
}

/**
* Return a Moodle url object for the server connection including the search index.
*
* @param string $path The solr path to append.
* @return \moodle_url
*/
public function get_connection_url($path) {
return $this->get_server_url($this->config->indexname . '/' . ltrim($path, '/'));
}

/**
* Calls the Solr engine with a GET request (for things the Solr extension doesn't support).
*
* This has similar result to get_curl_object but uses the newer (mockable) Guzzle HTTP client.
*
* @param string $path URL path (after /solr/) e.g. 'admin/cores?action=STATUS&core=frog'
* @param array $overrideoptions Optional array of Guzzle options, will override config
* @return \Psr\Http\Message\ResponseInterface Response message from Guzzle
* @throws \GuzzleHttp\Exception\GuzzleException If any problem connecting
* @since Moodle 5.0
*/
public function raw_get_request(
string $path,
array $overrideoptions = [],
): \Psr\Http\Message\ResponseInterface {
$client = \core\di::get(\core\http_client::class);
return $client->get(
$this->get_server_url($path)->out(false),
$this->get_http_client_options($overrideoptions),
);
}

/**
* Gets the \core\http_client options for a connection.
*
* @param array $overrideoptions Optional array to override some of the options
* @return array Array of http_client options
*/
protected function get_http_client_options(array $overrideoptions = []): array {
$options = [
'connect_timeout' => !empty($this->config->server_timeout) ? (int)$this->config->server_timeout : 30,
];
$options['read_timeout'] = $options['connect_timeout'];
if (!empty($this->config->server_username)) {
$options['auth'] = [$this->config->server_username, $this->config->server_password];
}
if (!empty($this->config->ssl_cert)) {
$options['cert'] = $this->config->ssl_cert;
}
if (!empty($this->config->ssl_key)) {
if (!empty($this->config->ssl_keypassword)) {
$options['ssl_key'] = [$this->config->ssl_key, $this->config->ssl_keypassword];
} else {
$options['ssl_key'] = $this->config->ssl_key;
}
}
if (!empty($this->config->ssl_cainfo)) {
$options['verify'] = $this->config->ssl_cainfo;
} else if (!empty($this->config->ssl_capath)) {
// Guzzle doesn't support a whole path of CA certs, so we have to make a single file
// with all the *.pem files in that directory. It needs to be in filesystem so we can
// use it directly, let's put it in local cache for 10 minutes.
$cachefolder = make_localcache_directory('search_solr');
$prefix = 'capath.' . sha1($this->config->ssl_capath);
$now = \core\di::get(\core\clock::class)->time();
$got = false;
foreach (scandir($cachefolder) as $filename) {
// You are not allowed to overwrite files in localcache folders so we use files
// with the time in, and delete old files with a 1 minute delay to avoid race
// conditions.
if (preg_match('~^(.*)\.([0-9]+)$~', $filename, $matches)) {
[1 => $fileprefix, 2 => $time] = $matches;
$pathname = $cachefolder . '/' . $filename;
if ($time > $now - self::CA_PATH_CACHE_TIME && $fileprefix === $prefix) {
$options['verify'] = $pathname;
$got = true;
break;
} else if ($time <= $now - self::CA_PATH_CACHE_TIME - self::CA_PATH_CACHE_DELETE_AFTER) {
unlink($pathname);
}
}
}

if (!$got) {
// If we don't have it yet, we need to make the cached file.
$allpems = '';
foreach (scandir($this->config->ssl_capath) as $filename) {
if (preg_match('~\.pem$~', $filename)) {
$pathname = $this->config->ssl_capath . '/' . $filename;
$allpems .= file_get_contents($pathname) . "\n\n";
}
}
$pathname = $cachefolder . '/' . $prefix . '.' . $now;
file_put_contents($pathname, $allpems);
$options['verify'] = $pathname;
}
}

// Apply other/overridden options.
foreach ($overrideoptions as $name => $value) {
$options[$name] = $value;
}

return $options;
}

/**
* Solr includes group support in the execute_query function.
*
Expand Down
8 changes: 8 additions & 0 deletions search/engine/solr/lang/en/search_solr.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/

$string['check_indextoobig'] = 'Index larger than specified size';
$string['check_nocore'] = 'Cannot find index on Solr server';
$string['check_nosize'] = 'Unable to determine index size on Solr server';
$string['check_notconnected'] = 'Cannot connect to Solr server';
$string['check_time'] = 'Server responded with status in {$a}';
$string['connectionerror'] = 'The specified Solr server is not available or the specified index does not exist';
$string['connectionsettings'] = 'Connection settings';
$string['errorcreatingschema'] = 'Error creating the Solr schema: {$a}';
Expand All @@ -32,6 +37,9 @@
$string['fileindexing_help'] = 'If your Solr install supports it, this feature allows Moodle to send files to be indexed.<br/>
You will need to reindex all site contents after enabling this option for all files to be added.';
$string['fileindexsettings'] = 'File indexing settings';
$string['indexsize'] = 'The index is using {$a} on the Solr server.';
$string['indexsizelimit'] = 'Index size limit';
$string['indexsizelimit_desc'] = 'Shows an error on the status report page if the search index grows larger than this size (in bytes), and a warning if it exceeds 90%. 0 means no monitoring.';
$string['maxindexfilekb'] = 'Maximum file size to index (kB)';
$string['maxindexfilekb_help'] = 'Files larger than this number of kilobytes will not be included in search indexing. If set to zero, files of any size will be indexed.';
$string['minimumsolr4'] = 'Solr 4.0 is the minimum version required for Moodle';
Expand Down
Loading

0 comments on commit b6a1558

Please sign in to comment.