Skip to content

Commit

Permalink
JAMES-4077 [SearchSnippet] Highlight OpenSearch implementation (#2447)
Browse files Browse the repository at this point in the history
  • Loading branch information
vttranlina authored Oct 14, 2024
1 parent 16cec18 commit ab68ea4
Show file tree
Hide file tree
Showing 8 changed files with 423 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ private void next(FluxSink<ScrollResponse<ObjectNode>> sink, AtomicReference<Opt
}

Consumer<ScrollResponse<ObjectNode>> onResponse = searchResponse -> {
scrollId.set(Optional.of(searchResponse.scrollId()));
scrollId.set(Optional.ofNullable(searchResponse.scrollId()));
sink.next(searchResponse);

if (searchResponse.hits().hits().isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.james.mailbox.exception.MailboxException;
import org.apache.james.mailbox.model.ComposedMessageId;
import org.apache.james.mailbox.model.MailboxId;
import org.apache.james.mailbox.model.MessageId;
import org.apache.james.mailbox.model.MultimailboxesSearchQuery;
import org.apache.james.mailbox.model.SearchQuery;
import org.apache.james.mime4j.dom.Message;
Expand Down Expand Up @@ -64,14 +65,14 @@ default void highlightSearchShouldReturnHighLightedSubjectWhenMatched() throws E
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Matthieu for your help")
.setSubject("Hallo, Thx Matthieu for your help")
.setBody("append contentA to inbox", StandardCharsets.UTF_8)),
session).getId();

ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Alex for your help")
.setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox", StandardCharsets.UTF_8)),
session).getId();

Expand All @@ -90,7 +91,7 @@ default void highlightSearchShouldReturnHighLightedSubjectWhenMatched() throws E
assertThat(searchSnippets).hasSize(1);
assertSoftly(softly -> {
softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId());
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx <mark>Matthieu</mark> for your help");
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo, Thx <mark>Matthieu</mark> for your help");
});
}

Expand All @@ -102,14 +103,14 @@ default void highlightSearchShouldReturnHighlightedBodyWhenMatched() throws Exce
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Matthieu for your help")
.setSubject("Hallo, Thx Matthieu for your help")
.setBody("append contentA to inbox", StandardCharsets.UTF_8)),
session).getId();

ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Alex for your help")
.setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox", StandardCharsets.UTF_8)),
session).getId();

Expand Down Expand Up @@ -139,14 +140,14 @@ default void searchBothSubjectAndBodyHighLightShouldReturnEmptyWhenNotMatched()
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Matthieu for your help")
.setSubject("Hallo, Thx Matthieu for your help")
.setBody("append contentA to inbox", StandardCharsets.UTF_8)),
session).getId();

ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Alex for your help")
.setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox", StandardCharsets.UTF_8)),
session).getId();

Expand All @@ -172,14 +173,14 @@ default void searchBothSubjectAndBodyHighLightShouldReturnEntryWhenMatched() thr
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Naruto for your help")
.setSubject("Hallo, Thx Naruto for your help")
.setBody("append Naruto to inbox", StandardCharsets.UTF_8)),
session).getId();

ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Alex for your help")
.setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox", StandardCharsets.UTF_8)),
session).getId();

Expand All @@ -200,7 +201,7 @@ default void searchBothSubjectAndBodyHighLightShouldReturnEntryWhenMatched() thr
assertSoftly(softly -> {
softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId());
softly.assertThat(searchSnippets.getFirst().highlightedBody()).contains("append <mark>Naruto</mark> to inbox");
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx <mark>Naruto</mark> for your help");
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo, Thx <mark>Naruto</mark> for your help");
});
}

Expand Down Expand Up @@ -245,7 +246,7 @@ default void highlightSearchShouldReturnCorrectFormatWhenSearchTwoWords() throws
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Naruto Itachi for your help")
.setSubject("Hallo, Thx Naruto Itachi for your help")
.setBody("append Naruto Itachi to inbox", StandardCharsets.UTF_8)),
session).getId();

Expand Down Expand Up @@ -319,7 +320,7 @@ default void highlightSearchShouldNotReturnEntryWhenDoesNotAccessible() throws E
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Matthieu for your help")
.setSubject("Hallo, Thx Matthieu for your help")
.setBody("append contentA to inbox", StandardCharsets.UTF_8)),
session).getId();

Expand All @@ -344,7 +345,7 @@ default void highlightSearchShouldReturnEntryWhenHasAccessible() throws Exceptio
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Matthieu for your help")
.setSubject("Hallo, Thx Matthieu for your help")
.setBody("append contentA to inbox", StandardCharsets.UTF_8)),
session).getId();

Expand All @@ -364,7 +365,7 @@ default void highlightSearchShouldReturnEntryWhenHasAccessible() throws Exceptio

// Then highlightSearch should return username1 entry
assertThat(searchSnippets).hasSize(1);
assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx <mark>Matthieu</mark> for your help");
assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo, Thx <mark>Matthieu</mark> for your help");
}

@Test
Expand All @@ -374,14 +375,14 @@ default void highLightSearchShouldSupportConjunctionCriterionInMultiMessage() th
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Naruto for your help")
.setSubject("Hallo, Thx Naruto for your help")
.setBody("append Naruto to inbox", StandardCharsets.UTF_8)),
session).getId();

ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Alex for your help")
.setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox", StandardCharsets.UTF_8)),
session).getId();

Expand All @@ -402,8 +403,8 @@ default void highLightSearchShouldSupportConjunctionCriterionInMultiMessage() th
assertThat(searchSnippets.stream()
.map(SearchSnippet::highlightedSubject)
.toList())
.containsExactlyInAnyOrder(Optional.of("Hallo! Thx <mark>Naruto</mark> for your help"),
Optional.of("Hallo! Thx <mark>Alex</mark> for your help"));
.containsExactlyInAnyOrder(Optional.of("Hallo, Thx <mark>Naruto</mark> for your help"),
Optional.of("Hallo, Thx <mark>Alex</mark> for your help"));
}

@Test
Expand All @@ -413,14 +414,14 @@ default void highLightSearchShouldSupportConjunctionCriterionInSingleMessage() t
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Naruto for your help - Sasuke for your help")
.setSubject("Hallo, Thx Naruto for your help - Sasuke for your help")
.setBody("append Naruto to inbox", StandardCharsets.UTF_8)),
session).getId();

ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo! Thx Alex for your help")
.setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox", StandardCharsets.UTF_8)),
session).getId();

Expand All @@ -441,6 +442,27 @@ default void highLightSearchShouldSupportConjunctionCriterionInSingleMessage() t
assertThat(searchSnippets.stream()
.map(SearchSnippet::highlightedSubject)
.toList())
.containsExactlyInAnyOrder(Optional.of("Hallo! Thx <mark>Naruto</mark> for your help - <mark>Sasuke</mark> for your help"));
.containsExactlyInAnyOrder(Optional.of("Hallo, Thx <mark>Naruto</mark> for your help - <mark>Sasuke</mark> for your help"));
}

@Test
default void highLightSearchShouldReturnEmptyWhenMessageIdsIsEmpty() throws Exception {
MailboxSession session = session(USERNAME1);
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo, Thx Naruto Itachi for your help")
.setBody("append Naruto Itachi to inbox", StandardCharsets.UTF_8)),
session).getId();

verifyMessageWasIndexed(1);

List<MessageId> messageIdsSearch = List.of();

assertThat(Flux.from(testee().highlightSearch(messageIdsSearch, MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.bodyContains("Naruto Itachi")))
.inMailboxes(List.of(m1.getMailboxId()))
.build(), session))
.collectList()
.block()).hasSize(0);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ public LuceneSearchHighlighter(LuceneMessageSearchIndex luceneMessageSearchIndex

@Override
public Flux<SearchSnippet> highlightSearch(List<MessageId> messageIds, MultimailboxesSearchQuery expression, MailboxSession session) {
if (messageIds.isEmpty()) {
return Flux.empty();
}
return storeMailboxManager.getInMailboxIds(expression, session)
.collectList()
.flatMapMany(inMailboxIdsAccessible -> highlightSearch(inMailboxIdsAccessible, expression.getSearchQuery(), messageIds));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import static org.apache.james.mailbox.opensearch.json.JsonMessageConstants.MAILBOX_ID;
import static org.apache.james.mailbox.opensearch.json.JsonMessageConstants.MESSAGE_ID;
import static org.apache.james.mailbox.opensearch.json.JsonMessageConstants.UID;
import static org.apache.james.mailbox.opensearch.search.OpenSearchSearcher.SEARCH_HIGHLIGHT;

import java.util.Collection;
import java.util.EnumSet;
Expand Down Expand Up @@ -324,7 +325,7 @@ protected Flux<MessageUid> doSearch(MailboxSession session, Mailbox mailbox, Sea
Preconditions.checkArgument(session != null, "'session' is mandatory");
Optional<Integer> noLimit = Optional.empty();

return searcher.search(ImmutableList.of(mailbox.getMailboxId()), searchQuery, noLimit, UID_FIELD)
return searcher.search(ImmutableList.of(mailbox.getMailboxId()), searchQuery, noLimit, UID_FIELD, !SEARCH_HIGHLIGHT)
.handle(this::extractUidFromHit);
}

Expand All @@ -336,7 +337,7 @@ public Flux<MessageId> search(MailboxSession session, Collection<MailboxId> mail
return Flux.empty();
}

return searcher.search(mailboxIds, searchQuery, Optional.empty(), MESSAGE_ID_FIELD)
return searcher.search(mailboxIds, searchQuery, Optional.empty(), MESSAGE_ID_FIELD, !SEARCH_HIGHLIGHT)
.handle(this::extractMessageIdFromHit)
.distinct()
.take(limit);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ public interface JsonMessageConstants {
String SENT_DATE = "sentDate";
String SAVE_DATE = "saveDate";
String ATTACHMENTS = "attachments";
String TEXT = "text";
String MIME_MESSAGE_ID = "mimeMessageID";
String USER = "user";

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
****************************************************************/

package org.apache.james.mailbox.opensearch.search;

import java.util.List;
import java.util.Map;
import java.util.Optional;

import org.apache.james.mailbox.MailboxSession;
import org.apache.james.mailbox.model.MailboxId;
import org.apache.james.mailbox.model.MessageId;
import org.apache.james.mailbox.model.MultimailboxesSearchQuery;
import org.apache.james.mailbox.model.SearchQuery;
import org.apache.james.mailbox.opensearch.json.JsonMessageConstants;
import org.apache.james.mailbox.searchhighligt.SearchHighlighter;
import org.apache.james.mailbox.searchhighligt.SearchSnippet;
import org.apache.james.mailbox.store.StoreMailboxManager;
import org.opensearch.client.opensearch.core.search.Hit;

import com.fasterxml.jackson.databind.node.ObjectNode;

import reactor.core.publisher.Flux;

public class OpenSearchSearchHighlighter implements SearchHighlighter {
public static final String ATTACHMENT_TEXT_CONTENT_FIELD = JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.TEXT_CONTENT;
public static final List<String> SNIPPET_FIELDS = List.of(
JsonMessageConstants.MESSAGE_ID,
JsonMessageConstants.SUBJECT,
JsonMessageConstants.TEXT_BODY,
ATTACHMENT_TEXT_CONTENT_FIELD);

private final OpenSearchSearcher openSearchSearcher;
private final StoreMailboxManager storeMailboxManager;
private final MessageId.Factory messageIdFactory;

public OpenSearchSearchHighlighter(OpenSearchSearcher openSearchSearcher, StoreMailboxManager storeMailboxManager, MessageId.Factory messageIdFactory) {
this.openSearchSearcher = openSearchSearcher;
this.storeMailboxManager = storeMailboxManager;
this.messageIdFactory = messageIdFactory;
}

@Override
public Flux<SearchSnippet> highlightSearch(List<MessageId> messageIds, MultimailboxesSearchQuery expression, MailboxSession session) {
if (messageIds.isEmpty()) {
return Flux.empty();
}

return storeMailboxManager.getInMailboxIds(expression, session)
.collectList()
.flatMapMany(mailboxIds -> highlightSearch(mailboxIds, expression.getSearchQuery(), messageIds.size()));
}

private Flux<SearchSnippet> highlightSearch(List<MailboxId> mailboxIds, SearchQuery query, int limit) {
return openSearchSearcher.search(mailboxIds, query, Optional.of(limit), SNIPPET_FIELDS, OpenSearchSearcher.SEARCH_HIGHLIGHT)
.map(this::buildSearchSnippet);
}

private SearchSnippet buildSearchSnippet(Hit<ObjectNode> searchResult) {
MessageId messageId = Optional.ofNullable(searchResult.fields().get(JsonMessageConstants.MESSAGE_ID))
.map(jsonData -> jsonData.toJson().asJsonArray().getString(0))
.map(messageIdFactory::fromString)
.orElseThrow(() -> new IllegalStateException("Can not extract MessageID for search result: " + searchResult.id()));

Map<String, List<String>> highlightHit = searchResult.highlight();

Optional<String> highlightedSubject = Optional.ofNullable(highlightHit.get(JsonMessageConstants.SUBJECT))
.map(List::getFirst);
Optional<String> highlightedTextBody = Optional.ofNullable(highlightHit.get(JsonMessageConstants.TEXT_BODY))
.or(() -> Optional.ofNullable(highlightHit.get(ATTACHMENT_TEXT_CONTENT_FIELD)))
.map(List::getFirst);

return new SearchSnippet(messageId, highlightedSubject, highlightedTextBody);
}
}
Loading

0 comments on commit ab68ea4

Please sign in to comment.