Skip to content

Commit

Permalink
refactor(omnisearch): Add facets to search interface
Browse files Browse the repository at this point in the history
  • Loading branch information
scruwys authored and Scott Cruwys committed Nov 17, 2020
1 parent 9b31ecd commit 987e0fb
Show file tree
Hide file tree
Showing 29 changed files with 725 additions and 245 deletions.
5 changes: 3 additions & 2 deletions app/comments/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,9 @@ def as_search_result(self):
def to_doc(self):
return {
'pk': str(self.id),
'workspace_id': self.workspace.id,
'datastore_id': self.content_object.datastore_id,
'workspace_id': self.workspace_id,
'datastore_id': self.content_object.datastore.id,
'datastore_engine': self.content_object.datastore.engine,
'text': self.text,
}

Expand Down
14 changes: 8 additions & 6 deletions app/definitions/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,12 +379,13 @@ class Meta:
def to_doc(self):
return {
'pk': str(self.id),
'workspace_id': self.workspace.id,
'datastore_id': self.schema.datastore.id,
'datastore': self.schema.datastore.name,
'workspace_id': self.workspace_id,
'datastore_id': self.schema.datastore_id,
'datastore_engine': self.schema.datastore.engine,
'schema': self.schema.name,
'name': self.name,
'description': self.short_desc,
'tags': self.tags,
}

@property
Expand Down Expand Up @@ -544,13 +545,14 @@ def as_search_result(self):
def to_doc(self):
return {
'pk': str(self.id),
'workspace_id': self.table.workspace.id,
'datastore_id': self.table.schema.datastore.id,
'datastore': self.table.schema.datastore.name,
'workspace_id': self.table.workspace_id,
'datastore_id': self.table.schema.datastore_id,
'datastore_engine': self.table.schema.datastore.engine,
'schema': self.table.schema.name,
'table': self.table.name,
'name': self.name,
'description': self.short_desc,
'tags': self.table.tags,
}

@property
Expand Down
4 changes: 2 additions & 2 deletions app/omnisearch/backends/base_search_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ class BaseSearchBackend(metaclass=abc.ABCMeta):
needs to return a list of dictionaries or similar iterable of objects that support __getitem__.
"""

def __init__(self, workspace, user):
def __init__(self, workspace, user, **kwargs):
self.workspace = workspace
self.user = user

@abc.abstractmethod
def search(self, search_query_string, **extra_filters):
def execute(self, query, types=None, datastores=None, start=0, size=100, **facets):
""" Search the backend with a given query string. This needs to return the following signature:
[
Expand Down
83 changes: 65 additions & 18 deletions app/omnisearch/backends/elastic_backend.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# -*- coding: utf-8 -*-
import collections
import time

from django.conf import settings

import app.omnisearch.backends.base_search_backend as base

from elasticsearch import Elasticsearch
from elasticsearch.helpers import parallel_bulk

from elasticsearch_dsl import Search
from elasticsearch_dsl import Search, A

import app.definitions.permissions as definition_permissions
import app.definitions.models as definition_models
Expand All @@ -24,6 +26,13 @@ class ElasticBackend(base.BaseSearchBackend):
'comment': 'Comment',
}

ALLOWED_FACET_MAP = {
'datastores': 'datastore_id',
'datastore_engines': 'datastore_engine',
'schemas': 'schema.keyword',
'tags': 'tags.keyword',
}

client = None

@classmethod
Expand All @@ -37,11 +46,21 @@ def __new__(cls, *args, **kwargs):
if cls.client is None:
cls.client = cls.create_client()
instance = super().__new__(cls)

return instance

def to_dict(self):
return {
'elapsed': self.elapsed,
'results': self.results,
'facets': self.facets,
}

@property
def possible_indexes(self):
return list(self.INDEX_MODEL_MAP.keys())

def user_permission_ids(self):
""" Returns workspace_id and datastore_ids that a user can view.
"""Returns workspace_id and datastore_ids that a user can view.
"""
datastores = definition_models.Datastore.objects.filter(workspace=self.workspace)
datastores = definition_permissions.get_datastores_for_user(datastores, self.user)
Expand All @@ -54,21 +73,21 @@ def bulk_insert(self, actions, as_list=True, **kwargs):
return list(response)
return response

def search(self, search_query_string, datastore_id=None, start=0, size=100, **extra_filters):
def execute(self, query, types=None, datastores=None, start=0, size=100, **facets):
workspace_id, datastore_ids = self.user_permission_ids()

if not datastore_ids:
# User has no access to any datastores.
return []

if datastore_id and datastore_id not in datastore_ids:
# User is requesting a datastore they don't have access to.
return []
# Filter out datastores that the User does not have access to.
if datastores:
datastore_ids = [d for d in datastores if d in datastore_ids]

elif datastore_id:
datastore_ids = [datastore_id]
index = [t for t in (types or []) if t in self.possible_indexes]

s = Search(using=self.client)
t = time.time()
s = Search(index=index, using=self.client)
s = s.query(
'multi_match',
type='phrase_prefix',
Expand All @@ -77,21 +96,49 @@ def search(self, search_query_string, datastore_id=None, start=0, size=100, **ex
'table',
'description',
'name^1.1',
'text^1.1'
'text^1.1',
],
query=search_query_string,
query=query,
).filter(
'term',
workspace_id=workspace_id,
).filter(
'terms',
datastore_id=datastore_ids,
)
s = s.filter('term', workspace_id=workspace_id)
s = s.filter('terms', datastore_id=datastore_ids)

results = s[start:start + size]
for facet_name, es_field in self.ALLOWED_FACET_MAP.items():
# Register facet in Elasticsearch query
s.aggs.bucket(facet_name, A('terms', field=es_field))

return [
value = facets.get(facet_name)

if value:
s = s.filter('terms', **{es_field: value})

results = s.execute()

self._results = [
{
'pk': hit.pk,
'model_name': self.INDEX_MODEL_MAP[hit.meta.index],
'score': hit.meta.score / results._response.hits.max_score,
'score': hit.meta.score / results.hits.max_score,
'datastore_id': hit.datastore_id,
}
for hit in results
for hit in results.hits[start:start + size]
]

self._facets = results.aggs
self._elapsed = round(time.time() - t, 3)

@property
def results(self):
return self._results

@property
def facets(self):
return self._facets.to_dict()

@property
def elapsed(self):
return self._elapsed
75 changes: 57 additions & 18 deletions app/omnisearch/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@
},
'custom_english_stop': {
'type': 'stop',
'stopwords': ENGLISH_STOPWORDS
'stopwords': ENGLISH_STOPWORDS,
}
},
'tokenizer': {
Expand All @@ -160,13 +160,8 @@
'datastore_id': {
'type': 'keyword',
},
'datastore': {
'type': 'text',
'analyzer': 'slug_case_split',
},
'schema': {
'type': 'text',
'analyzer': 'slug_case_split',
'datastore_engine': {
'type': 'keyword',
},
'name': {
'type': 'text',
Expand All @@ -176,6 +171,29 @@
'type': 'text',
'analyzer': 'custom_english_stop',
},
'schema': {
'type': 'text',
'fields': {
'keyword': {
'type': 'keyword',
},
'text': {
'type': 'text',
'analyzer': 'slug_case_split',
}
}
},
'tags': {
'type': 'text',
'fields': {
'keyword': {
'type': 'keyword',
},
'text': {
'type': 'text',
}
}
},
}
}
}
Expand Down Expand Up @@ -204,21 +222,16 @@
'mappings': {
'properties': {
'pk': {
'type': 'keyword'
'type': 'keyword',
},
'workspace_id': {
'type': 'keyword'
'type': 'keyword',
},
'datastore_id': {
'type': 'keyword'
},
'datastore': {
'type': 'text',
'analyzer': 'slug_case_split',
'type': 'keyword',
},
'schema': {
'type': 'text',
'analyzer': 'slug_case_split',
'datastore_engine': {
'type': 'keyword',
},
'table': {
'type': 'text',
Expand All @@ -232,6 +245,29 @@
'type': 'text',
'analyzer': 'custom_english_stop',
},
'schema': {
'type': 'text',
'fields': {
'keyword': {
'type': 'keyword',
},
'text': {
'type': 'text',
'analyzer': 'slug_case_split',
}
}
},
'tags': {
'type': 'text',
'fields': {
'keyword': {
'type': 'keyword',
},
'text': {
'type': 'text',
}
}
},
}
}
}
Expand Down Expand Up @@ -259,6 +295,9 @@
'datastore_id': {
'type': 'keyword',
},
'datastore_engine': {
'type': 'keyword',
},
'text': {
'type': 'text',
'analyzer': 'custom_english_stop',
Expand Down
23 changes: 9 additions & 14 deletions app/omnisearch/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,17 @@ class Query(graphene.ObjectType):
omnisearch = graphene.Field(
schema.OmnisearchResponseType,
content=graphene.String(required=True),
datastore_id=graphene.String(required=False),
types=graphene.List(graphene.String, required=False),
datastores=graphene.List(graphene.String, required=False),
engines=graphene.List(graphene.String, required=False),
schemas=graphene.List(graphene.String, required=False),
tags=graphene.List(graphene.String, required=False),
)

@permissions.permissions_required(permission_classes=(permissions.WorkspaceTeamMembersOnly,))
def resolve_omnisearch(self, info, content, datastore_id=None, **kwargs):
def resolve_omnisearch(self, info, content, **extras):
"""Execute a search query and return a result.
"""
filter_kwargs = {
'search_query_string': content,
'workspace': info.context.workspace,
}

if datastore_id:
filter_kwargs['datastore_id'] = datastore_id

start_t = time.time()
results = get_search_backend(info.context.workspace, info.context.user).search(**filter_kwargs)

return dict(search_results=results, time_elapsed=round(time.time() - start_t, 3))
omnisearch = get_search_backend(info.context.workspace, info.context.user)
omnisearch.execute(content, **extras)
return omnisearch.to_dict()
8 changes: 6 additions & 2 deletions app/omnisearch/schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# -*- coding: utf-8 -*-
import graphene

from graphene.types.generic import GenericScalar


class OmnisearchResultType(graphene.ObjectType):
"""GraphQL representation of an individual search result.
Expand Down Expand Up @@ -47,6 +49,8 @@ def resolve_model_name(instance, info):
class OmnisearchResponseType(graphene.ObjectType):
"""Parent response type for all omnisearch results.
"""
search_results = graphene.List(OmnisearchResultListType)
results = graphene.List(OmnisearchResultListType)

facets = graphene.Field(GenericScalar)

time_elapsed = graphene.Float()
elapsed = graphene.Float()
Loading

0 comments on commit 987e0fb

Please sign in to comment.