-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Testing search method * Fixed the search algorithm * Incremented the patch * Added more test cases * Added a comment
- Loading branch information
Showing
3 changed files
with
112 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import string | ||
|
||
def generate_keyword_mapping(queries: list) -> dict: | ||
""" | ||
Creates a mapping of keywords to queries. | ||
:param queries: a list of queries with responses | ||
:return: a dictionary of keywords to query indices | ||
""" | ||
keyword_to_queries = dict() | ||
for i, question in enumerate(queries): | ||
if question.get('query'): | ||
keywords = generate_keywords(question.get("query")) | ||
for keyword in keywords: | ||
keyword_to_queries.setdefault(keyword, {}) | ||
keyword_to_queries[keyword].setdefault(i, 0) | ||
keyword_to_queries[keyword][i] += 10 | ||
keywords = generate_keywords(question.get("response")) | ||
for keyword in keywords: | ||
keyword_to_queries.setdefault(keyword, {}) | ||
keyword_to_queries[keyword].setdefault(i, 0) | ||
keyword_to_queries[keyword][i] += 1 | ||
return keyword_to_queries | ||
|
||
|
||
def generate_keywords(query: string) -> list: | ||
""" | ||
Create a list of keywords from a query. | ||
:param query: a search query | ||
:return: the list of keywords from that query | ||
""" | ||
stop_words = ["", "is", "a", "the", "can", | ||
"i", "to", "in", "by", "from", "be", "of", | ||
"what", "where", "when", "why", "how", "which"] | ||
keywords = query \ | ||
.translate(str.maketrans('', '', string.punctuation)) \ | ||
.lower() \ | ||
.split(" ") | ||
keywords = [word for word in keywords if word not in stop_words] | ||
return keywords | ||
|
||
|
||
def search(keyword_to_queries: dict, keywords: list) -> list: | ||
""" | ||
Looks up the list of queries that satisfy a keyword. | ||
:param keyword_to_queries: a mapping of keywords to query indices | ||
:param keywords: a list of keywords to lookup | ||
:return: a list of query indices | ||
""" | ||
query_count = dict() | ||
for keyword in keywords: | ||
query_indices = keyword_to_queries.get(keyword, {}) | ||
for i, weight in query_indices.items(): | ||
query_count.setdefault(i, 0) | ||
query_count[i] += weight | ||
best_matches = list( | ||
dict(sorted(query_count.items(), key=lambda item: item[1], reverse=True)).keys()) | ||
return best_matches | ||
|
||
|
||
def create_md_link(url: string, text: string) -> string: | ||
""" | ||
Creates a markdown link. | ||
:param url: the url to link to | ||
:param text: the text to display | ||
:return: the markdown link | ||
""" | ||
if url: | ||
return f"[{text}]({url})" | ||
return text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import json | ||
from code_bot_utils import * | ||
|
||
queries = json.load(open("queries.json")) | ||
keyword_mapping = generate_keyword_mapping(queries) | ||
|
||
def top_queries(top_ids): | ||
return [queries[i]["query"] for i in top_ids] | ||
|
||
def test_search_exact_match_first(): | ||
top_ids = search(keyword_mapping, ["what", "is", "a", "magic", "number"])[:1] | ||
assert 1 in top_ids | ||
assert "What is a magic number?" in top_queries(top_ids) | ||
|
||
def test_search_exact_match_middle(): | ||
top_ids = search(keyword_mapping, ["what", "is", "method", "overriding"])[:1] | ||
assert 12 in top_ids | ||
assert "What is method overriding?" in top_queries(top_ids) | ||
|
||
def test_search_routine_match(): | ||
top_ids = search(keyword_mapping, ["what", "does", "implements", "mean"])[:3] | ||
assert 14 in top_ids | ||
assert "What is the implements relationship?" in top_queries(top_ids) | ||
|
||
def test_generate_keyword_mapping(): | ||
test_query = { | ||
"query": "How now brown cow?", | ||
"response": "The cow is brown." | ||
} | ||
expected_mapping = { | ||
"brown": {0: 11}, # index: weight | ||
"cow": {0: 11}, | ||
"now": {0: 10} | ||
} | ||
keyword_mapping = generate_keyword_mapping([test_query]) | ||
assert keyword_mapping == expected_mapping |