Skip to content

Commit

Permalink
OpenConceptLab/ocl_issues#2023 | Match API updates
Browse files Browse the repository at this point in the history
snyaggarwal committed Jan 2, 2025
1 parent 0d445b1 commit 59ac15b
Showing 5 changed files with 47 additions and 44 deletions.
5 changes: 4 additions & 1 deletion core/common/search.py
Original file line number Diff line number Diff line change
@@ -225,7 +225,10 @@ def to_queryset(self, keep_order=True):
])
else:
pks = [result.meta.id for result in s]
qs = self._dsl_search._model.objects.filter(pk__in=pks) # pylint: disable=protected-access
if len(pks) == 1:
qs = self._dsl_search._model.objects.filter(pk=pks[0]) # pylint: disable=protected-access
else:
qs = self._dsl_search._model.objects.filter(pk__in=pks) # pylint: disable=protected-access
if keep_order:
preserved_order = Case(
*[When(pk=pk, then=pos) for pos, pk in enumerate(pks)],
11 changes: 10 additions & 1 deletion core/common/serializers.py
Original file line number Diff line number Diff line change
@@ -171,12 +171,21 @@ def validate_identifier(value):


class SearchResultSerializer(Serializer): # pylint: disable=abstract-method
match_type = CharField(source='_match_type', allow_null=True, allow_blank=True)
search_score = FloatField(source='_score', allow_null=True)
search_confidence = CharField(source='_confidence', allow_null=True, allow_blank=True)
search_highlight = SerializerMethodField()

class Meta:
fields = ('search_score', 'search_confidence', 'search_highlight')
fields = ('search_score', 'search_confidence', 'search_highlight', 'match_type')

def to_representation(self, instance):
rep = super().to_representation(instance)

if not rep.get('match_type', None):
rep.pop('match_type', None)

return rep

@staticmethod
def get_search_highlight(obj):
4 changes: 2 additions & 2 deletions core/concepts/search.py
Original file line number Diff line number Diff line change
@@ -79,10 +79,10 @@ def get_exact_and_contains_criteria(field, value, boost=0, add_boost=True):
}))

@classmethod
def search(cls, data, repo_url, include_retired=False): # pylint: disable=too-many-locals, too-many-branches
def search(cls, data, repo_url, repo_params=None, include_retired=False): # pylint: disable=too-many-locals, too-many-branches
from core.concepts.documents import ConceptDocument
search = ConceptDocument.search()
repo_params = cls.get_target_repo_params(repo_url)
repo_params = repo_params or cls.get_target_repo_params(repo_url)
for field, value in repo_params.items():
search = search.query('match', **{field: value})
if not include_retired:
6 changes: 3 additions & 3 deletions core/concepts/serializers.py
Original file line number Diff line number Diff line change
@@ -159,7 +159,7 @@ def __init__(self, *args, **kwargs): # pylint: disable=too-many-branches
self.fields.pop('child_concepts', None)
if not self.include_child_concept_urls:
self.fields.pop('child_concept_urls')
if not self.include_parent_concept_urls and (not get(request, 'method') or get(request, 'method') == 'GET'):
if not self.include_parent_concept_urls:
self.fields.pop('parent_concept_urls')
if not self.include_hierarchy_path:
self.fields.pop('hierarchy_path', None)
@@ -205,13 +205,13 @@ def get_mappings(self, obj):
mappings = obj.get_bidirectional_mappings_for_collection(
parent_uri, collection_version
) if is_collection else obj.get_bidirectional_mappings()
return serializer_class(mappings, many=True, context=context).data
return serializer_class(mappings, many=True).data
if self.include_direct_mappings:
mappings = obj.get_unidirectional_mappings_for_collection(
parent_uri, collection_version) if is_collection else obj.get_unidirectional_mappings()
if map_types:
mappings = mappings.filter(map_type__in=map_types)
return serializer_class(mappings, many=True, context=context).data
return serializer_class(mappings, many=True).data

return []

65 changes: 28 additions & 37 deletions core/concepts/views.py
Original file line number Diff line number Diff line change
@@ -746,8 +746,9 @@ def post(request):
)


class MetadataToConceptsListView(BaseAPIView, ListWithHeadersMixin): # pragma: no cover
default_limit = 5
class MetadataToConceptsListView(BaseAPIView):
default_limit = 1
score_threshold = 6
serializer_class = ConceptListSerializer
permission_classes = (IsAuthenticatedOrReadOnly,)

@@ -760,45 +761,35 @@ def get_serializer_class(self):
return ConceptListSerializer

def filter_queryset(self, _=None):
row = self.request.data.get('row')
rows = self.request.data.get('rows')
target_repo_url = self.request.data.get('target_repo_url')
target_repo_params = self.request.data.get('target_repo')
include_retired = self.request.query_params.get(INCLUDE_RETIRED_PARAM) in get_truthy_values()
if not row or not target_repo_url:
if not rows or (not target_repo_url and not target_repo_params):
raise Http400()
offset = max(to_int(self.request.GET.get('offset'), 0), 0)
self.limit = int(self.limit) or self.default_limit
limit = max(to_int(self.request.GET.get('limit'), 0), 0) or self.default_limit
page = max(to_int(self.request.GET.get('page'), 1), 1)
start = offset or (page - 1) * self.limit
end = start + self.limit

search = ConceptFuzzySearch.search(row, target_repo_url, include_retired)
es_search = CustomESSearch(search[start:end], ConceptDocument)
es_search.to_queryset()
self.total_count = es_search.total - offset
self._scores = es_search.scores
self._max_score = es_search.max_score
self._highlights = es_search.highlights
return es_search.queryset
start = offset or (page - 1) * limit
end = start + limit
results = []
for row in rows:
search = ConceptFuzzySearch.search(row, target_repo_url, target_repo_params, include_retired)
es_search = CustomESSearch(search[start:end], ConceptDocument)
es_search.to_queryset(False)
result = {'row': row, 'results': []}
for concept in es_search.queryset:
concept._highlight = es_search.highlights.get(concept.id, {})
concept._score = es_search.scores.get(concept.id, {})
concept._match_type = 'low'
if concept._score > self.score_threshold:
concept._match_type = 'high'
if concept._highlight.get('name', None):
concept._match_type = 'very_high'
result['results'].append(ConceptMinimalSerializer(concept, context={'request': self.request}).data)
results.append(result)

return results

def post(self, request, **kwargs): # pylint: disable=unused-argument
self.limit = self.default_limit
self.object_list = self.filter_queryset()
sorted_list = self.object_list

if not self.limit or int(self.limit) == 0 or int(self.limit) > 1000:
self.limit = self.default_limit
paginator = CustomPaginator(
request=request, queryset=self.object_list, page_size=self.limit, total_count=self.total_count,
is_sliced=True, max_score=get(self, '_max_score'),
search_scores=get(self, '_scores'), highlights=get(self, '_highlights')
)
headers = paginator.headers
results = paginator.current_page_results
data = self.serialize_list(results, paginator)

response = Response(data)
for key, value in headers.items():
response[key] = value
if not headers:
response['num_found'] = len(sorted_list)
return response
return Response(self.filter_queryset())

0 comments on commit 59ac15b

Please sign in to comment.