Skip to content

Commit

Permalink
Modified search to take in multiple strings (#4650)
Browse files Browse the repository at this point in the history
* modified search to take in multiple strings

* style: pre-commit fixes

* added tests

* added changelog entry

* modified docstring

* modified search to handle partial matches

* modified tests

* minor refactoring

* added tests

* modified to handle empty inputs

* fixed test coverage

* fixed original_keys

* minor changes

* added annotations

* result formatting

* error message formatting

* style: pre-commit fixes

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Eric G. Kratz <[email protected]>
Co-authored-by: Agriya Khetarpal <[email protected]>
  • Loading branch information
4 people authored Dec 26, 2024
1 parent e51778a commit a1f73b6
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 32 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Features

- Enhanced the `search` method to accept multiple search terms in the form of a string or a list. ([#4650](https://github.com/pybamm-team/PyBaMM/pull/4650))
- Made composite electrode model compatible with particle size distribution ([#4687](https://github.com/pybamm-team/PyBaMM/pull/4687))
- Added `Symbol.post_order()` method to return an iterable that steps through the tree in post-order fashion. ([#4684](https://github.com/pybamm-team/PyBaMM/pull/4684))
- Added two more submodels (options) for the SEI: Lars von Kolzenberg (2020) model and Tunneling Limit model ([#4394](https://github.com/pybamm-team/PyBaMM/pull/4394))
Expand Down
111 changes: 85 additions & 26 deletions src/pybamm/util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import annotations
import importlib.util
import importlib.metadata
import numbers
Expand Down Expand Up @@ -108,38 +109,96 @@ def __getitem__(self, key):
f"'{key}' not found. Best matches are {best_matches}"
) from error

def search(self, key, print_values=False):
def _find_matches(self, search_key: str, known_keys: list[str]):
"""
Search dictionary for keys containing 'key'. If print_values is True, then
both the keys and values will be printed. Otherwise just the values will
be printed. If no results are found, the best matches are printed.
"""
key_in = key
key = key_in.lower()
Helper method to find exact and partial matches for a given search key.
# Sort the keys so results are stored in alphabetical order
keys = list(self.keys())
keys.sort()
results = {}
Parameters
----------
search_key : str
The term to search for in the keys.
known_keys : list of str
The list of known dictionary keys to search within.
# Check if any of the dict keys contain the key we are searching for
for k in keys:
if key in k.lower():
results[k] = self[k]
"""
exact = [key for key in known_keys if search_key in key.lower()]
partial = difflib.get_close_matches(search_key, known_keys, n=5, cutoff=0.5)
return exact, partial

if results == {}:
# If no results, return best matches
best_matches = self.get_best_matches(key)
def search(self, keys: str | list[str], print_values: bool = False):
"""
Search dictionary for keys containing all terms in 'keys'.
If print_values is True, both the keys and values will be printed.
Otherwise, just the keys will be printed. If no results are found,
the best matches are printed.
Parameters
----------
keys : str or list of str
Search term(s)
print_values : bool, optional
If True, print both keys and values. Otherwise, print only keys.
Default is False.
"""

if not isinstance(keys, (str, list)) or not all(
isinstance(k, str) for k in keys
):
msg = f"'keys' must be a string or a list of strings, got {type(keys)}"
raise TypeError(msg)

if isinstance(keys, str):
if not keys.strip():
msg = "The search term cannot be an empty or whitespace-only string"
raise ValueError(msg)
original_keys = [keys]
search_keys = [keys.strip().lower()]

elif isinstance(keys, list):
if all(not str(k).strip() for k in keys):
msg = "The 'keys' list cannot contain only empty or whitespace strings"
raise ValueError(msg)

original_keys = keys
search_keys = [k.strip().lower() for k in keys if k.strip()]

known_keys = list(self.keys())
known_keys.sort()

# Check for exact matches where all search keys appear together in a key
exact_matches = [
key
for key in known_keys
if all(term in key.lower() for term in search_keys)
]

if exact_matches:
print(
f"No results for search using '{key_in}'. "
f"Best matches are {best_matches}"
f"Results for '{' '.join(k for k in original_keys if k.strip())}': {exact_matches}"
)
elif print_values:
# Else print results, including dict items
print("\n".join(f"{k}\t{v}" for k, v in results.items()))
else:
# Just print keys
print("\n".join(f"{k}" for k in results.keys()))
if print_values:
for match in exact_matches:
print(f"{match} -> {self[match]}")
return

# If no exact matches, iterate over search keys individually
for original_key, search_key in zip(original_keys, search_keys):
exact_key_matches, partial_matches = self._find_matches(
search_key, known_keys
)

if exact_key_matches:
print(f"Exact matches for '{original_key}': {exact_key_matches}")
if print_values:
for match in exact_key_matches:
print(f"{match} -> {self[match]}")
else:
if partial_matches:
print(
f"No exact matches found for '{original_key}'. Best matches are: {partial_matches}"
)
else:
print(f"No matches found for '{original_key}'")

def copy(self):
return FuzzyDict(super().copy())
Expand Down
77 changes: 71 additions & 6 deletions tests/unit/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,19 +188,84 @@ def test_url_gets_to_stdout(self, mocker):
# Test variables search (default returns key)
with mocker.patch("sys.stdout", new=StringIO()) as fake_out:
model.variables.search("Electrode")
assert fake_out.getvalue() == "Electrode potential\n"

assert (
fake_out.getvalue()
== "Results for 'Electrode': ['Electrode potential']\n"
)
# Test bad var search (returns best matches)
with mocker.patch("sys.stdout", new=StringIO()) as fake_out:
model.variables.search("Electrolyte cot")
out = (
"No results for search using 'Electrolyte cot'. "
"Best matches are ['Electrolyte concentration', "
"'Electrode potential']\n"
"No exact matches found for 'Electrolyte cot'. "
"Best matches are: ['Electrolyte concentration', 'Electrode potential']\n"
)
assert fake_out.getvalue() == out

# Test for multiple strings as input (default returns key)
with mocker.patch("sys.stdout", new=StringIO()) as fake_out:
model.variables.search(["Electrolyte", "Concentration"], print_values=True)
assert (
fake_out.getvalue()
== "Results for 'Electrolyte Concentration': ['Electrolyte concentration']\n"
"Electrolyte concentration -> 1\n"
)

# Test for multiple strings as input (default returns best matches)
with mocker.patch("sys.stdout", new=StringIO()) as fake_out:
model.variables.search(["Electrolyte", "Potenteel"], print_values=True)
out = (
"Exact matches for 'Electrolyte': ['Electrolyte concentration']\n"
"Electrolyte concentration -> 1\n"
"No exact matches found for 'Potenteel'. Best matches are: ['Electrode potential']\n"
)
assert fake_out.getvalue() == out

# Test param search (default returns key, value)
with mocker.patch("sys.stdout", new=StringIO()) as fake_out:
param.search("test")
assert fake_out.getvalue() == "test\t10\n"
out = "Results for 'test': ['test']\n" "test -> 10\n"
assert fake_out.getvalue() == out

# Test no matches and no best matches
with mocker.patch("sys.stdout", new=StringIO()) as fake_out:
model.variables.search("NonexistentKey")
assert fake_out.getvalue() == "No matches found for 'NonexistentKey'\n"

# Test print_values=True with partial matches
with mocker.patch("sys.stdout", new=StringIO()) as fake_out:
model.variables.search("Electrolyte", print_values=True)
out = (
"Results for 'Electrolyte': ['Electrolyte concentration']\n"
"Electrolyte concentration -> 1\n"
)
assert fake_out.getvalue() == out

# Test for empty string input (raises ValueError)
with pytest.raises(
ValueError,
match="The search term cannot be an empty or whitespace-only string",
):
model.variables.search("", print_values=True)

# Test for list with all empty strings (raises ValueError)
with pytest.raises(
ValueError,
match="The 'keys' list cannot contain only empty or whitespace strings",
):
model.variables.search(["", " ", "\t"], print_values=True)

# Test for list with a mix of empty and valid strings
with mocker.patch("sys.stdout", new=StringIO()) as fake_out:
model.variables.search(["", "Electrolyte"], print_values=True)
out = (
"Results for 'Electrolyte': ['Electrolyte concentration']\n"
"Electrolyte concentration -> 1\n"
)
assert fake_out.getvalue() == out

# Test invalid input type
with pytest.raises(
TypeError,
match="'keys' must be a string or a list of strings, got <class 'int'>",
):
model.variables.search(123)

0 comments on commit a1f73b6

Please sign in to comment.