Skip to content

Commit

Permalink
Improved the Search Algorithm (#5)
Browse files Browse the repository at this point in the history
* Testing search method

* Fixed the search algorithm

* Incremented the patch

* Added more test cases

* Added a comment
  • Loading branch information
jrg94 authored Mar 21, 2022
1 parent cc79264 commit cd79029
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 69 deletions.
72 changes: 3 additions & 69 deletions code_bot.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
import string

from code_bot_utils import *

import discord
from discord import Message
Expand All @@ -11,74 +12,7 @@
from dotenv import load_dotenv


__version__ = "0.1.0"


# Helper methods
def generate_keyword_mapping(queries: list) -> dict:
"""
Creates a mapping of keywords to queries.
:param queries: a list of queries with responses
:return: a dictionary of keywords to query indices
"""
keyword_to_queries = dict()
for i, question in enumerate(queries):
if question.get('query'):
keywords = generate_keywords(question.get("query"))
keywords.extend(generate_keywords(question.get("response")))
for keyword in keywords:
keyword_to_queries.setdefault(keyword, []).append(i)
return keyword_to_queries


def generate_keywords(query: string) -> list:
"""
Create a list of keywords from a query.
:param query: a search query
:return: the list of keywords from that query
"""
stop_words = ["", "is", "a", "the", "can",
"i", "to", "in", "by", "from", "be", "of"]
keywords = query \
.translate(str.maketrans('', '', string.punctuation)) \
.lower() \
.split(" ")
keywords = [word for word in keywords if word not in stop_words]
return keywords


def search(keyword_to_queries: dict, keywords: list) -> list:
"""
Looks up the list of queries that satisfy a keyword.
:param keyword_to_queries: a mapping of keywords to query indices
:param keywords: a list of keywords to lookup
:return: a list of query indices
"""
query_count = dict()
for keyword in keywords:
query_indices = keyword_to_queries.get(keyword, [])
for i in query_indices:
query_count.setdefault(i, 0)
query_count[i] += 1
best_matches = list(
dict(sorted(query_count.items(), key=lambda item: item[1])).keys())
return best_matches


def create_md_link(url: string, text: string) -> string:
"""
Creates a markdown link.
:param url: the url to link to
:param text: the text to display
:return: the markdown link
"""
if url:
return f"[{text}]({url})"
return text
__version__ = "0.1.1"


# Global variables
Expand Down
73 changes: 73 additions & 0 deletions code_bot_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import string

def generate_keyword_mapping(queries: list) -> dict:
"""
Creates a mapping of keywords to queries.
:param queries: a list of queries with responses
:return: a dictionary of keywords to query indices
"""
keyword_to_queries = dict()
for i, question in enumerate(queries):
if question.get('query'):
keywords = generate_keywords(question.get("query"))
for keyword in keywords:
keyword_to_queries.setdefault(keyword, {})
keyword_to_queries[keyword].setdefault(i, 0)
keyword_to_queries[keyword][i] += 10
keywords = generate_keywords(question.get("response"))
for keyword in keywords:
keyword_to_queries.setdefault(keyword, {})
keyword_to_queries[keyword].setdefault(i, 0)
keyword_to_queries[keyword][i] += 1
return keyword_to_queries


def generate_keywords(query: string) -> list:
"""
Create a list of keywords from a query.
:param query: a search query
:return: the list of keywords from that query
"""
stop_words = ["", "is", "a", "the", "can",
"i", "to", "in", "by", "from", "be", "of",
"what", "where", "when", "why", "how", "which"]
keywords = query \
.translate(str.maketrans('', '', string.punctuation)) \
.lower() \
.split(" ")
keywords = [word for word in keywords if word not in stop_words]
return keywords


def search(keyword_to_queries: dict, keywords: list) -> list:
"""
Looks up the list of queries that satisfy a keyword.
:param keyword_to_queries: a mapping of keywords to query indices
:param keywords: a list of keywords to lookup
:return: a list of query indices
"""
query_count = dict()
for keyword in keywords:
query_indices = keyword_to_queries.get(keyword, {})
for i, weight in query_indices.items():
query_count.setdefault(i, 0)
query_count[i] += weight
best_matches = list(
dict(sorted(query_count.items(), key=lambda item: item[1], reverse=True)).keys())
return best_matches


def create_md_link(url: string, text: string) -> string:
"""
Creates a markdown link.
:param url: the url to link to
:param text: the text to display
:return: the markdown link
"""
if url:
return f"[{text}]({url})"
return text
36 changes: 36 additions & 0 deletions test_code_bot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import json
from code_bot_utils import *

queries = json.load(open("queries.json"))
keyword_mapping = generate_keyword_mapping(queries)

def top_queries(top_ids):
return [queries[i]["query"] for i in top_ids]

def test_search_exact_match_first():
top_ids = search(keyword_mapping, ["what", "is", "a", "magic", "number"])[:1]
assert 1 in top_ids
assert "What is a magic number?" in top_queries(top_ids)

def test_search_exact_match_middle():
top_ids = search(keyword_mapping, ["what", "is", "method", "overriding"])[:1]
assert 12 in top_ids
assert "What is method overriding?" in top_queries(top_ids)

def test_search_routine_match():
top_ids = search(keyword_mapping, ["what", "does", "implements", "mean"])[:3]
assert 14 in top_ids
assert "What is the implements relationship?" in top_queries(top_ids)

def test_generate_keyword_mapping():
test_query = {
"query": "How now brown cow?",
"response": "The cow is brown."
}
expected_mapping = {
"brown": {0: 11}, # index: weight
"cow": {0: 11},
"now": {0: 10}
}
keyword_mapping = generate_keyword_mapping([test_query])
assert keyword_mapping == expected_mapping

0 comments on commit cd79029

Please sign in to comment.