diff --git a/benchmark/data/benchmark_api_calling_data.yaml b/benchmark/data/benchmark_api_calling_data.yaml index 85340a81..c890fca4 100644 --- a/benchmark/data/benchmark_api_calling_data.yaml +++ b/benchmark/data/benchmark_api_calling_data.yaml @@ -73,3 +73,29 @@ api_calling: expected: parts_of_query: ["https://bio.tools/api/t/", "\\?topic=", "[mM]etabolomics"] + - case: brapi:germplasm:poc + input: + prompt: + fuzzy_search: "What germplasms belong to the Vitis genus?" + expected: + parts_of_query: + [ + "https://urgi.versailles.inrae.fr/faidare/brapi/v1/", + "germplasm", + "\\?genus", + "Vitis", + ] + - case: brapi:germplasm:genusAndSpecies + input: + prompt: + fuzzy_search: "What germplasms belong to the Vitis genus and species vinifera?" + expected: + parts_of_query: + [ + "https://urgi.versailles.inrae.fr/faidare/brapi/v1/germplasm", + "\\?genus", + "Vitis", + "\\&species", + "vinifera", + ] + diff --git a/benchmark/results/api_calling.csv b/benchmark/results/api_calling.csv index 20083fc1..fa5d7237 100644 --- a/benchmark/results/api_calling.csv +++ b/benchmark/results/api_calling.csv @@ -19,6 +19,7 @@ gpt-4o-2024-05-13,oncokb:braf:histiocytosis:exact_spelling,4;4;4/4,3,f7e5a41f8fb gpt-4o-2024-05-13,oncokb:braf:melanoma:exact_spelling,4;4;4/4,3,b52eb44672033de81ec2213895507706,2024-07-18 12:27:07,0.4.13 gpt-4o-2024-05-13,oncokb:ros1:lung_adenocarcinoma:exact_spelling,3;3;3/6,3,a453756cc6dd549acb390cdf6108eddf,2024-07-18 12:27:35,0.4.13 gpt-4o-2024-05-13,oncokb:tp53:colon_adenocarcinoma:exact_spelling,2;2;2/3,3,72f23eb51d35737c7a446180582488ab,2024-07-18 12:27:13,0.4.13 +gpt-4o-2024-08-06,brapi:germplasm:poc:fuzzy_search,4/4,1,a906498221d9ee3fddd857529a7f6af8,2024-11-14 14:16:09,0.7.5 gpt-4o-mini-2024-07-18,oncokb:braf:histiocytosis:descriptive_spelling,3;3;3;3;3/4,5,a52dc4ba2dd21ff2aa53654c3f26b2c5,2024-07-31 00:14:55,0.5.1 gpt-4o-mini-2024-07-18,oncokb:braf:histiocytosis:exact_spelling,4;4;4;4;4/4,5,f7e5a41f8fb5eb520571bc46c8e4916c,2024-07-31 00:14:48,0.5.1 gpt-4o-mini-2024-07-18,oncokb:braf:melanoma:exact_spelling,4;4;4;4;4/4,5,b52eb44672033de81ec2213895507706,2024-07-31 00:14:32,0.5.1 diff --git a/benchmark/test_api_calling.py b/benchmark/test_api_calling.py index c198ebd1..a90c4e15 100644 --- a/benchmark/test_api_calling.py +++ b/benchmark/test_api_calling.py @@ -5,7 +5,11 @@ import pytest from biochatter._misc import ensure_iterable -from biochatter.api_agent import OncoKBQueryBuilder, BioToolsQueryBuilder +from biochatter.api_agent import ( + OncoKBQueryBuilder, + BioToolsQueryBuilder, + BrAPIQueryBuilder, +) from .conftest import calculate_bool_vector_score from .benchmark_utils import ( skip_if_already_run, @@ -36,6 +40,8 @@ def run_test(): builder = OncoKBQueryBuilder() elif "biotools" in yaml_data["case"]: builder = BioToolsQueryBuilder() + elif "brapi" in yaml_data["case"]: + builder = BrAPIQueryBuilder() parameters = builder.parameterise_query( question=yaml_data["input"]["prompt"], conversation=conversation, diff --git a/biochatter/api_agent/__init__.py b/biochatter/api_agent/__init__.py index 4d9084ab..74cf306e 100644 --- a/biochatter/api_agent/__init__.py +++ b/biochatter/api_agent/__init__.py @@ -11,6 +11,7 @@ BioToolsInterpreter, BioToolsQueryBuilder, ) +from .brapi import BrAPIQueryBuilder, BrAPIFetcher, BrAPIInterpreter from .api_agent import APIAgent __all__ = [ diff --git a/biochatter/api_agent/brapi.py b/biochatter/api_agent/brapi.py index be2c5c27..7bac58c3 100644 --- a/biochatter/api_agent/brapi.py +++ b/biochatter/api_agent/brapi.py @@ -12,7 +12,7 @@ from .abc import BaseFetcher, BaseInterpreter, BaseQueryBuilder BRAPI_QUERY_PROMPT = """ -You are a world class algorithm for creating queries in structured formats. Your task is to use the web API of Breeding API (BrAPI) to answer questions about . +You are a world class algorithm for creating queries in structured formats. Your task is to use the web API of Breeding API (BrAPI) to answer questions about plant germplasm or phenotyping studies. You have to extract the appropriate information out of the examples: 1. To list information about the tools, use the endpoint with parameters like . @@ -22,11 +22,24 @@ Base URL - +https://urgi.versailles.inrae.fr/faidare/brapi/v1/ Endpoints and Parameters - +1. Get germplasm + • GET /germplasm + • Parameters: + • accessionNumber The unique identifier for a material or germplasm within a genebankMCPD (v2.1) (ACCENUMB) 2. This is the unique identifier for accessions within a genebank, and is assigned when a sample is entered into the genebank collection (e.g. "PI 113869"). + • collection A specific panel/collection/population name this germplasm belongs to. + • binomialName The full binomial name (scientific name) to identify a germplasm + • genus Genus name to identify germplasm + • species Species name to identify germplasm + • synonym Alternative name or ID used to reference this germplasm + • studyDbId Use this parameter to only return results associated with the given Study unique identifier. Use GET /studies to find the list of available Studies on a server. + • germplasmName Use this parameter to only return results associated with the given Germplasm by its human readable name. Use GET /germplasm to find the list of available Germplasm on a server. + • germplasmPUI Use this parameter to only return results associated with the given Germplasm by its global permanent unique identifier. Use GET /germplasm to find the list of available Germplasm on a server. + + """ @@ -55,6 +68,42 @@ class BrAPIQueryParameters(BaseModel): default_factory=lambda: str(uuid.uuid4()), description="Unique identifier for the question.", ) + accessionNumber : str = Field( + default=None, + description="The unique identifier for a material or germplasm within a genebankMCPD (v2.1) (ACCENUMB) 2. This is the unique identifier for accessions within a genebank, and is assigned when a sample is entered into the genebank collection (e.g. \"PI 113869\".)", + ) + collection : str = Field( + default=None, + description="A specific panel/collection/population name this germplasm belongs to.", + ) + binomialName : str = Field( + default=None, + description="The full binomial name (scientific name) to identify a germplasm.", + ) + genus : str = Field( + default=None, + description="Genus name to identify germplasm.", + ) + species : str = Field( + default=None, + description="Species name to identify germplasm.", + ) + synonym : str = Field( + default=None, + description="Alternative name or ID used to reference this germplasm.", + ) + studyDbId : str = Field( + default=None, + description="Use this parameter to only return results associated with the given Study unique identifier. Use GET /studies to find the list of available Studies on a server.", + ) + germplasmName : str = Field( + default=None, + description="Use this parameter to only return results associated with the given Germplasm by its human readable name. Use GET /germplasm to find the list of available Germplasm on a server.", + ) + germplasmPUI : str = Field( + default=None, + description="Use this parameter to only return results associated with the given Germplasm by its global permanent unique identifier. Use GET /germplasm to find the list of available Germplasm on a server.", + ) class BrAPIQueryBuilder(BaseQueryBuilder):