From ab68ea48fd460fdc4491b8a3372ed00ba7e97c2f Mon Sep 17 00:00:00 2001 From: vttran Date: Mon, 14 Oct 2024 11:35:07 +0700 Subject: [PATCH] JAMES-4077 [SearchSnippet] Highlight OpenSearch implementation (#2447) --- .../opensearch/search/ScrolledSearch.java | 2 +- .../SearchHighLighterContract.java | 64 +++-- .../search/LuceneSearchHighlighter.java | 3 + ...OpenSearchListeningMessageSearchIndex.java | 5 +- .../opensearch/json/JsonMessageConstants.java | 1 - .../search/OpenSearchSearchHighlighter.java | 91 +++++++ .../opensearch/search/OpenSearchSearcher.java | 42 ++- .../OpenSearchSearchHighlighterTest.java | 244 ++++++++++++++++++ 8 files changed, 423 insertions(+), 29 deletions(-) create mode 100644 mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighter.java create mode 100644 mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighterTest.java diff --git a/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java b/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java index 3a5d22d1ce0..8f9ccf209dc 100644 --- a/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java +++ b/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java @@ -72,7 +72,7 @@ private void next(FluxSink> sink, AtomicReference> onResponse = searchResponse -> { - scrollId.set(Optional.of(searchResponse.scrollId())); + scrollId.set(Optional.ofNullable(searchResponse.scrollId())); sink.next(searchResponse); if (searchResponse.hits().hits().isEmpty()) { diff --git a/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java b/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java index e95f5f8d06a..e273e95e9ca 100644 --- a/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java +++ b/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java @@ -33,6 +33,7 @@ import org.apache.james.mailbox.exception.MailboxException; import org.apache.james.mailbox.model.ComposedMessageId; import org.apache.james.mailbox.model.MailboxId; +import org.apache.james.mailbox.model.MessageId; import org.apache.james.mailbox.model.MultimailboxesSearchQuery; import org.apache.james.mailbox.model.SearchQuery; import org.apache.james.mime4j.dom.Message; @@ -64,14 +65,14 @@ default void highlightSearchShouldReturnHighLightedSubjectWhenMatched() throws E ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Matthieu for your help") + .setSubject("Hallo, Thx Matthieu for your help") .setBody("append contentA to inbox", StandardCharsets.UTF_8)), session).getId(); ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Alex for your help") + .setSubject("Hallo, Thx Alex for your help") .setBody("append contentB to inbox", StandardCharsets.UTF_8)), session).getId(); @@ -90,7 +91,7 @@ default void highlightSearchShouldReturnHighLightedSubjectWhenMatched() throws E assertThat(searchSnippets).hasSize(1); assertSoftly(softly -> { softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId()); - softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx Matthieu for your help"); + softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo, Thx Matthieu for your help"); }); } @@ -102,14 +103,14 @@ default void highlightSearchShouldReturnHighlightedBodyWhenMatched() throws Exce ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Matthieu for your help") + .setSubject("Hallo, Thx Matthieu for your help") .setBody("append contentA to inbox", StandardCharsets.UTF_8)), session).getId(); ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Alex for your help") + .setSubject("Hallo, Thx Alex for your help") .setBody("append contentB to inbox", StandardCharsets.UTF_8)), session).getId(); @@ -139,14 +140,14 @@ default void searchBothSubjectAndBodyHighLightShouldReturnEmptyWhenNotMatched() ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Matthieu for your help") + .setSubject("Hallo, Thx Matthieu for your help") .setBody("append contentA to inbox", StandardCharsets.UTF_8)), session).getId(); ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Alex for your help") + .setSubject("Hallo, Thx Alex for your help") .setBody("append contentB to inbox", StandardCharsets.UTF_8)), session).getId(); @@ -172,14 +173,14 @@ default void searchBothSubjectAndBodyHighLightShouldReturnEntryWhenMatched() thr ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Naruto for your help") + .setSubject("Hallo, Thx Naruto for your help") .setBody("append Naruto to inbox", StandardCharsets.UTF_8)), session).getId(); ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Alex for your help") + .setSubject("Hallo, Thx Alex for your help") .setBody("append contentB to inbox", StandardCharsets.UTF_8)), session).getId(); @@ -200,7 +201,7 @@ default void searchBothSubjectAndBodyHighLightShouldReturnEntryWhenMatched() thr assertSoftly(softly -> { softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId()); softly.assertThat(searchSnippets.getFirst().highlightedBody()).contains("append Naruto to inbox"); - softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx Naruto for your help"); + softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo, Thx Naruto for your help"); }); } @@ -245,7 +246,7 @@ default void highlightSearchShouldReturnCorrectFormatWhenSearchTwoWords() throws ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Naruto Itachi for your help") + .setSubject("Hallo, Thx Naruto Itachi for your help") .setBody("append Naruto Itachi to inbox", StandardCharsets.UTF_8)), session).getId(); @@ -319,7 +320,7 @@ default void highlightSearchShouldNotReturnEntryWhenDoesNotAccessible() throws E ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Matthieu for your help") + .setSubject("Hallo, Thx Matthieu for your help") .setBody("append contentA to inbox", StandardCharsets.UTF_8)), session).getId(); @@ -344,7 +345,7 @@ default void highlightSearchShouldReturnEntryWhenHasAccessible() throws Exceptio ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Matthieu for your help") + .setSubject("Hallo, Thx Matthieu for your help") .setBody("append contentA to inbox", StandardCharsets.UTF_8)), session).getId(); @@ -364,7 +365,7 @@ default void highlightSearchShouldReturnEntryWhenHasAccessible() throws Exceptio // Then highlightSearch should return username1 entry assertThat(searchSnippets).hasSize(1); - assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx Matthieu for your help"); + assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo, Thx Matthieu for your help"); } @Test @@ -374,14 +375,14 @@ default void highLightSearchShouldSupportConjunctionCriterionInMultiMessage() th ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Naruto for your help") + .setSubject("Hallo, Thx Naruto for your help") .setBody("append Naruto to inbox", StandardCharsets.UTF_8)), session).getId(); ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Alex for your help") + .setSubject("Hallo, Thx Alex for your help") .setBody("append contentB to inbox", StandardCharsets.UTF_8)), session).getId(); @@ -402,8 +403,8 @@ default void highLightSearchShouldSupportConjunctionCriterionInMultiMessage() th assertThat(searchSnippets.stream() .map(SearchSnippet::highlightedSubject) .toList()) - .containsExactlyInAnyOrder(Optional.of("Hallo! Thx Naruto for your help"), - Optional.of("Hallo! Thx Alex for your help")); + .containsExactlyInAnyOrder(Optional.of("Hallo, Thx Naruto for your help"), + Optional.of("Hallo, Thx Alex for your help")); } @Test @@ -413,14 +414,14 @@ default void highLightSearchShouldSupportConjunctionCriterionInSingleMessage() t ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Naruto for your help - Sasuke for your help") + .setSubject("Hallo, Thx Naruto for your help - Sasuke for your help") .setBody("append Naruto to inbox", StandardCharsets.UTF_8)), session).getId(); ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from( Message.Builder.of() .setTo("to@james.local") - .setSubject("Hallo! Thx Alex for your help") + .setSubject("Hallo, Thx Alex for your help") .setBody("append contentB to inbox", StandardCharsets.UTF_8)), session).getId(); @@ -441,6 +442,27 @@ default void highLightSearchShouldSupportConjunctionCriterionInSingleMessage() t assertThat(searchSnippets.stream() .map(SearchSnippet::highlightedSubject) .toList()) - .containsExactlyInAnyOrder(Optional.of("Hallo! Thx Naruto for your help - Sasuke for your help")); + .containsExactlyInAnyOrder(Optional.of("Hallo, Thx Naruto for your help - Sasuke for your help")); + } + + @Test + default void highLightSearchShouldReturnEmptyWhenMessageIdsIsEmpty() throws Exception { + MailboxSession session = session(USERNAME1); + ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo, Thx Naruto Itachi for your help") + .setBody("append Naruto Itachi to inbox", StandardCharsets.UTF_8)), + session).getId(); + + verifyMessageWasIndexed(1); + + List messageIdsSearch = List.of(); + + assertThat(Flux.from(testee().highlightSearch(messageIdsSearch, MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.bodyContains("Naruto Itachi"))) + .inMailboxes(List.of(m1.getMailboxId())) + .build(), session)) + .collectList() + .block()).hasSize(0); } } diff --git a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java index 6a4facb1dec..ee1fdf6edc9 100644 --- a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java +++ b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java @@ -98,6 +98,9 @@ public LuceneSearchHighlighter(LuceneMessageSearchIndex luceneMessageSearchIndex @Override public Flux highlightSearch(List messageIds, MultimailboxesSearchQuery expression, MailboxSession session) { + if (messageIds.isEmpty()) { + return Flux.empty(); + } return storeMailboxManager.getInMailboxIds(expression, session) .collectList() .flatMapMany(inMailboxIdsAccessible -> highlightSearch(inMailboxIdsAccessible, expression.getSearchQuery(), messageIds)); diff --git a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java index 547245de05e..9de1fedf31e 100644 --- a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java +++ b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java @@ -28,6 +28,7 @@ import static org.apache.james.mailbox.opensearch.json.JsonMessageConstants.MAILBOX_ID; import static org.apache.james.mailbox.opensearch.json.JsonMessageConstants.MESSAGE_ID; import static org.apache.james.mailbox.opensearch.json.JsonMessageConstants.UID; +import static org.apache.james.mailbox.opensearch.search.OpenSearchSearcher.SEARCH_HIGHLIGHT; import java.util.Collection; import java.util.EnumSet; @@ -324,7 +325,7 @@ protected Flux doSearch(MailboxSession session, Mailbox mailbox, Sea Preconditions.checkArgument(session != null, "'session' is mandatory"); Optional noLimit = Optional.empty(); - return searcher.search(ImmutableList.of(mailbox.getMailboxId()), searchQuery, noLimit, UID_FIELD) + return searcher.search(ImmutableList.of(mailbox.getMailboxId()), searchQuery, noLimit, UID_FIELD, !SEARCH_HIGHLIGHT) .handle(this::extractUidFromHit); } @@ -336,7 +337,7 @@ public Flux search(MailboxSession session, Collection mail return Flux.empty(); } - return searcher.search(mailboxIds, searchQuery, Optional.empty(), MESSAGE_ID_FIELD) + return searcher.search(mailboxIds, searchQuery, Optional.empty(), MESSAGE_ID_FIELD, !SEARCH_HIGHLIGHT) .handle(this::extractMessageIdFromHit) .distinct() .take(limit); diff --git a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java index 65cef552610..bb99f9ff855 100644 --- a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java +++ b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java @@ -45,7 +45,6 @@ public interface JsonMessageConstants { String SENT_DATE = "sentDate"; String SAVE_DATE = "saveDate"; String ATTACHMENTS = "attachments"; - String TEXT = "text"; String MIME_MESSAGE_ID = "mimeMessageID"; String USER = "user"; diff --git a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighter.java b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighter.java new file mode 100644 index 00000000000..9f676b3a7e2 --- /dev/null +++ b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighter.java @@ -0,0 +1,91 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.mailbox.opensearch.search; + +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import org.apache.james.mailbox.MailboxSession; +import org.apache.james.mailbox.model.MailboxId; +import org.apache.james.mailbox.model.MessageId; +import org.apache.james.mailbox.model.MultimailboxesSearchQuery; +import org.apache.james.mailbox.model.SearchQuery; +import org.apache.james.mailbox.opensearch.json.JsonMessageConstants; +import org.apache.james.mailbox.searchhighligt.SearchHighlighter; +import org.apache.james.mailbox.searchhighligt.SearchSnippet; +import org.apache.james.mailbox.store.StoreMailboxManager; +import org.opensearch.client.opensearch.core.search.Hit; + +import com.fasterxml.jackson.databind.node.ObjectNode; + +import reactor.core.publisher.Flux; + +public class OpenSearchSearchHighlighter implements SearchHighlighter { + public static final String ATTACHMENT_TEXT_CONTENT_FIELD = JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.TEXT_CONTENT; + public static final List SNIPPET_FIELDS = List.of( + JsonMessageConstants.MESSAGE_ID, + JsonMessageConstants.SUBJECT, + JsonMessageConstants.TEXT_BODY, + ATTACHMENT_TEXT_CONTENT_FIELD); + + private final OpenSearchSearcher openSearchSearcher; + private final StoreMailboxManager storeMailboxManager; + private final MessageId.Factory messageIdFactory; + + public OpenSearchSearchHighlighter(OpenSearchSearcher openSearchSearcher, StoreMailboxManager storeMailboxManager, MessageId.Factory messageIdFactory) { + this.openSearchSearcher = openSearchSearcher; + this.storeMailboxManager = storeMailboxManager; + this.messageIdFactory = messageIdFactory; + } + + @Override + public Flux highlightSearch(List messageIds, MultimailboxesSearchQuery expression, MailboxSession session) { + if (messageIds.isEmpty()) { + return Flux.empty(); + } + + return storeMailboxManager.getInMailboxIds(expression, session) + .collectList() + .flatMapMany(mailboxIds -> highlightSearch(mailboxIds, expression.getSearchQuery(), messageIds.size())); + } + + private Flux highlightSearch(List mailboxIds, SearchQuery query, int limit) { + return openSearchSearcher.search(mailboxIds, query, Optional.of(limit), SNIPPET_FIELDS, OpenSearchSearcher.SEARCH_HIGHLIGHT) + .map(this::buildSearchSnippet); + } + + private SearchSnippet buildSearchSnippet(Hit searchResult) { + MessageId messageId = Optional.ofNullable(searchResult.fields().get(JsonMessageConstants.MESSAGE_ID)) + .map(jsonData -> jsonData.toJson().asJsonArray().getString(0)) + .map(messageIdFactory::fromString) + .orElseThrow(() -> new IllegalStateException("Can not extract MessageID for search result: " + searchResult.id())); + + Map> highlightHit = searchResult.highlight(); + + Optional highlightedSubject = Optional.ofNullable(highlightHit.get(JsonMessageConstants.SUBJECT)) + .map(List::getFirst); + Optional highlightedTextBody = Optional.ofNullable(highlightHit.get(JsonMessageConstants.TEXT_BODY)) + .or(() -> Optional.ofNullable(highlightHit.get(ATTACHMENT_TEXT_CONTENT_FIELD))) + .map(List::getFirst); + + return new SearchSnippet(messageId, highlightedSubject, highlightedTextBody); + } +} diff --git a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java index 81b16e3ab37..09f1524de40 100644 --- a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java +++ b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java @@ -19,6 +19,8 @@ package org.apache.james.mailbox.opensearch.search; +import static org.apache.james.mailbox.opensearch.search.OpenSearchSearchHighlighter.ATTACHMENT_TEXT_CONTENT_FIELD; + import java.util.Collection; import java.util.List; import java.util.Optional; @@ -31,11 +33,15 @@ import org.apache.james.backends.opensearch.search.ScrolledSearch; import org.apache.james.mailbox.model.MailboxId; import org.apache.james.mailbox.model.SearchQuery; +import org.apache.james.mailbox.opensearch.json.JsonMessageConstants; import org.apache.james.mailbox.opensearch.query.QueryConverter; import org.apache.james.mailbox.opensearch.query.SortConverter; +import org.apache.james.mailbox.searchhighligt.SearchHighlighterConfiguration; import org.opensearch.client.opensearch._types.SortOptions; import org.opensearch.client.opensearch._types.Time; import org.opensearch.client.opensearch.core.SearchRequest; +import org.opensearch.client.opensearch.core.search.Highlight; +import org.opensearch.client.opensearch.core.search.HighlightField; import org.opensearch.client.opensearch.core.search.Hit; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -44,6 +50,7 @@ public class OpenSearchSearcher { public static final int DEFAULT_SEARCH_SIZE = 100; + public static final boolean SEARCH_HIGHLIGHT = true; private static final Time TIMEOUT = new Time.Builder().time("1m").build(); private static final int MAX_ROUTING_KEY = 5; @@ -52,24 +59,47 @@ public class OpenSearchSearcher { private final int size; private final AliasName aliasName; private final RoutingKey.Factory routingKeyFactory; + private final Highlight highlightQuery; public OpenSearchSearcher(ReactorOpenSearchClient client, QueryConverter queryConverter, int size, ReadAliasName aliasName, RoutingKey.Factory routingKeyFactory) { + this(client, queryConverter, size, aliasName, routingKeyFactory, SearchHighlighterConfiguration.DEFAULT); + } + + public OpenSearchSearcher(ReactorOpenSearchClient client, QueryConverter queryConverter, int size, + ReadAliasName aliasName, RoutingKey.Factory routingKeyFactory, + SearchHighlighterConfiguration searchHighlighterConfiguration) { this.client = client; this.queryConverter = queryConverter; this.size = size; this.aliasName = aliasName; this.routingKeyFactory = routingKeyFactory; + + HighlightField highlightField = new HighlightField.Builder() + .forceSource(true) + .preTags(searchHighlighterConfiguration.preTagFormatter()) + .postTags(searchHighlighterConfiguration.postTagFormatter()) + .fragmentSize(searchHighlighterConfiguration.fragmentSize()) + .numberOfFragments(1) + .build(); + + this.highlightQuery = new Highlight.Builder() + .fields(JsonMessageConstants.SUBJECT, highlightField) + .fields(JsonMessageConstants.TEXT_BODY, highlightField) + .fields(ATTACHMENT_TEXT_CONTENT_FIELD, highlightField) + .build(); } public Flux> search(Collection mailboxIds, SearchQuery query, - Optional limit, List fields) { - SearchRequest searchRequest = prepareSearch(mailboxIds, query, limit, fields); + Optional limit, List fields, + boolean searchHighlight) { + SearchRequest searchRequest = prepareSearch(mailboxIds, query, limit, fields, searchHighlight); return new ScrolledSearch(client, searchRequest) .searchHits(); } - private SearchRequest prepareSearch(Collection mailboxIds, SearchQuery query, Optional limit, List fields) { + private SearchRequest prepareSearch(Collection mailboxIds, SearchQuery query, + Optional limit, List fields, boolean highlight) { List sorts = query.getSorts() .stream() .flatMap(SortConverter::convertSort) @@ -84,6 +114,10 @@ private SearchRequest prepareSearch(Collection mailboxIds, SearchQuer .storedFields(fields) .sort(sorts); + if (highlight) { + request.highlight(highlightQuery); + } + return toRoutingKey(mailboxIds) .map(request::routing) .orElse(request) @@ -101,7 +135,7 @@ private Optional toRoutingKey(Collection mailboxIds) { } private int computeRequiredSize(Optional limit) { - return limit.map(value -> Math.min(value.intValue(), size)) + return limit.map(value -> Math.min(value, size)) .orElse(size); } diff --git a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighterTest.java b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighterTest.java new file mode 100644 index 00000000000..a0d73e19ba5 --- /dev/null +++ b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighterTest.java @@ -0,0 +1,244 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.mailbox.opensearch.search; + +import static org.apache.james.mailbox.opensearch.search.OpenSearchSearcherTest.SEARCH_SIZE; +import static org.assertj.core.api.Assertions.assertThat; +import static org.awaitility.Durations.ONE_HUNDRED_MILLISECONDS; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.time.ZoneId; +import java.util.List; +import java.util.UUID; + +import org.apache.james.backends.opensearch.DockerOpenSearchExtension; +import org.apache.james.backends.opensearch.IndexName; +import org.apache.james.backends.opensearch.OpenSearchIndexer; +import org.apache.james.backends.opensearch.ReactorOpenSearchClient; +import org.apache.james.backends.opensearch.ReadAliasName; +import org.apache.james.backends.opensearch.WriteAliasName; +import org.apache.james.core.Username; +import org.apache.james.mailbox.MailboxManager; +import org.apache.james.mailbox.MailboxSession; +import org.apache.james.mailbox.MessageManager; +import org.apache.james.mailbox.inmemory.InMemoryMessageId; +import org.apache.james.mailbox.inmemory.manager.InMemoryIntegrationResources; +import org.apache.james.mailbox.model.ComposedMessageId; +import org.apache.james.mailbox.model.Mailbox; +import org.apache.james.mailbox.model.MailboxACL; +import org.apache.james.mailbox.model.MailboxId; +import org.apache.james.mailbox.model.MailboxPath; +import org.apache.james.mailbox.model.MessageId; +import org.apache.james.mailbox.model.MultimailboxesSearchQuery; +import org.apache.james.mailbox.model.SearchQuery; +import org.apache.james.mailbox.opensearch.IndexAttachments; +import org.apache.james.mailbox.opensearch.IndexHeaders; +import org.apache.james.mailbox.opensearch.MailboxIdRoutingKeyFactory; +import org.apache.james.mailbox.opensearch.MailboxIndexCreationUtil; +import org.apache.james.mailbox.opensearch.OpenSearchMailboxConfiguration; +import org.apache.james.mailbox.opensearch.events.OpenSearchListeningMessageSearchIndex; +import org.apache.james.mailbox.opensearch.json.MessageToOpenSearchJson; +import org.apache.james.mailbox.opensearch.query.CriterionConverter; +import org.apache.james.mailbox.opensearch.query.QueryConverter; +import org.apache.james.mailbox.searchhighligt.SearchHighLighterContract; +import org.apache.james.mailbox.searchhighligt.SearchHighlighter; +import org.apache.james.mailbox.searchhighligt.SearchSnippet; +import org.apache.james.mailbox.store.StoreMailboxManager; +import org.apache.james.mailbox.store.StoreMessageManager; +import org.apache.james.mailbox.store.search.MessageSearchIndex; +import org.apache.james.mailbox.tika.TikaConfiguration; +import org.apache.james.mailbox.tika.TikaExtension; +import org.apache.james.mailbox.tika.TikaHttpClientImpl; +import org.apache.james.mailbox.tika.TikaTextExtractor; +import org.apache.james.metrics.tests.RecordingMetricFactory; +import org.apache.james.mime4j.dom.Message; +import org.apache.james.util.ClassLoaderUtils; +import org.awaitility.Awaitility; +import org.awaitility.Durations; +import org.awaitility.core.ConditionFactory; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import com.github.fge.lambdas.Throwing; +import com.google.common.collect.ImmutableSet; + +import reactor.core.publisher.Flux; + +public class OpenSearchSearchHighlighterTest implements SearchHighLighterContract { + private MessageSearchIndex messageSearchIndex; + private StoreMailboxManager storeMailboxManager; + private StoreMessageManager inboxMessageManager; + private OpenSearchSearchHighlighter testee; + + private static final ConditionFactory CALMLY_AWAIT = Awaitility + .with().pollInterval(ONE_HUNDRED_MILLISECONDS) + .and().pollDelay(ONE_HUNDRED_MILLISECONDS) + .await(); + + @RegisterExtension + static TikaExtension tika = new TikaExtension(); + + @RegisterExtension + static DockerOpenSearchExtension openSearch = new DockerOpenSearchExtension(DockerOpenSearchExtension.CleanupStrategy.NONE); + static ReactorOpenSearchClient client; + static TikaTextExtractor textExtractor; + + @BeforeAll + static void setUpAll() throws Exception { + client = openSearch.getDockerOpenSearch().clientProvider().get(); + textExtractor = new TikaTextExtractor(new RecordingMetricFactory(), + new TikaHttpClientImpl(TikaConfiguration.builder() + .host(tika.getIp()) + .port(tika.getPort()) + .timeoutInMillis(tika.getTimeoutInMillis()) + .build())); + } + + @AfterAll + static void tearDown() throws IOException { + client.close(); + } + + @BeforeEach + public void setUp() throws Exception { + WriteAliasName writeAliasName = new WriteAliasName(UUID.randomUUID().toString()); + ReadAliasName readAliasName = new ReadAliasName(UUID.randomUUID().toString()); + IndexName indexName = new IndexName(UUID.randomUUID().toString()); + MailboxIndexCreationUtil.prepareClient( + client, readAliasName, writeAliasName, indexName, + openSearch.getDockerOpenSearch().configuration()); + + MailboxIdRoutingKeyFactory routingKeyFactory = new MailboxIdRoutingKeyFactory(); + OpenSearchMailboxConfiguration openSearchMailboxConfiguration = OpenSearchMailboxConfiguration.builder() + .optimiseMoves(false) + .textFuzzinessSearch(false) + .build(); + final MessageId.Factory messageIdFactory = new InMemoryMessageId.Factory(); + + OpenSearchSearcher openSearchSearcher = new OpenSearchSearcher(client, new QueryConverter(new CriterionConverter(openSearchMailboxConfiguration)), SEARCH_SIZE, + readAliasName, routingKeyFactory); + + InMemoryIntegrationResources resources = InMemoryIntegrationResources.builder() + .preProvisionnedFakeAuthenticator() + .fakeAuthorizator() + .inVmEventBus() + .defaultAnnotationLimits() + .defaultMessageParser() + .listeningSearchIndex(preInstanciationStage -> new OpenSearchListeningMessageSearchIndex( + preInstanciationStage.getMapperFactory(), + ImmutableSet.of(), + new OpenSearchIndexer(client, + writeAliasName), + openSearchSearcher, + new MessageToOpenSearchJson(textExtractor, ZoneId.of("Europe/Paris"), IndexAttachments.YES, IndexHeaders.YES), + preInstanciationStage.getSessionProvider(), routingKeyFactory, messageIdFactory, + openSearchMailboxConfiguration, new RecordingMetricFactory())) + .noPreDeletionHooks() + .storeQuotaManager() + .build(); + + storeMailboxManager = resources.getMailboxManager(); + messageSearchIndex = resources.getSearchIndex(); + MailboxSession session = storeMailboxManager.createSystemSession(USERNAME1); + MailboxPath inboxPath = MailboxPath.inbox(USERNAME1); + storeMailboxManager.createMailbox(inboxPath, session); + inboxMessageManager = (StoreMessageManager) storeMailboxManager.getMailbox(inboxPath, session); + + testee = new OpenSearchSearchHighlighter(openSearchSearcher, storeMailboxManager, messageIdFactory); + } + + @Override + public SearchHighlighter testee() { + return testee; + } + + @Override + public MailboxSession session(Username username) { + return storeMailboxManager.createSystemSession(username); + } + + @Override + public MessageManager.AppendResult appendMessage(MessageManager.AppendCommand appendCommand, MailboxSession session) { + return Throwing.supplier(() -> inboxMessageManager.appendMessage(appendCommand, session)).get(); + } + + @Override + public MailboxId randomMailboxId(Username username) { + String random = new String(new byte[8]); + return Throwing.supplier(() -> storeMailboxManager.createMailbox(MailboxPath.forUser(USERNAME1, random), session(username)).get()).get(); + } + + @Override + public void applyRightsCommand(MailboxId mailboxId, Username owner, Username delegated) { + Mailbox mailbox = inboxMessageManager.getMailboxEntity(); + Throwing.runnable(() -> storeMailboxManager.applyRightsCommand(mailbox.generateAssociatedPath(), + MailboxACL.command().forUser(delegated).rights(MailboxACL.FULL_RIGHTS).asAddition(), + session(owner))).run(); + } + + @Override + public void verifyMessageWasIndexed(int indexedMessageCount) { + CALMLY_AWAIT.atMost(Durations.TEN_SECONDS) + .untilAsserted(() -> assertThat(messageSearchIndex.search(session(USERNAME1), inboxMessageManager.getMailboxEntity(), SearchQuery.of()).toStream().count()) + .isEqualTo(indexedMessageCount)); + } + + @Test + void shouldHighlightAttachmentTextContentWhenTextBodyDoesNotMatch() throws Exception { + assumeTrue(storeMailboxManager.getSupportedSearchCapabilities().contains(MailboxManager.SearchCapabilities.Attachment)); + MailboxSession session = session(USERNAME1); + + ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo, Thx Matthieu for your help") + .setBody("append contentA to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // m2 has an attachment with text content: "This is a beautiful banana" + ComposedMessageId m2 = inboxMessageManager.appendMessage( + MessageManager.AppendCommand.builder() + .build(ClassLoaderUtils.getSystemResourceAsSharedStream("eml/emailWithTextAttachment.eml")), + session).getId(); + + verifyMessageWasIndexed(2); + + String keywordSearch = "beautiful"; + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of( + new SearchQuery.ConjunctionCriterion(SearchQuery.Conjunction.OR, + List.of(SearchQuery.bodyContains(keywordSearch), + SearchQuery.attachmentContains(keywordSearch))))) + .inMailboxes(List.of(m1.getMailboxId(), m2.getMailboxId())) + .build(); + + List searchSnippets = Flux.from(testee().highlightSearch(List.of(m1.getMessageId(), m2.getMessageId()), multiMailboxSearch, session)) + .collectList() + .block(); + + assertThat(searchSnippets).hasSize(1); + assertThat(searchSnippets.getFirst().highlightedBody()).contains("This is a beautiful banana."); + } + +}