From 72e52ee131cc6317f96eb2264728659d2c023bb9 Mon Sep 17 00:00:00 2001 From: lievan Date: Tue, 14 Jan 2025 11:26:22 -0500 Subject: [PATCH 1/2] wip rel note --- ddtrace/llmobs/_evaluators/runner.py | 2 +- ddtrace/llmobs/_evaluators/sampler.py | 4 +- .../ragas-integration-a81b696757c0e7a5.yaml | 65 +++++++++++++++++++ tests/llmobs/test_llmobs_evaluator_runner.py | 2 +- tests/llmobs/test_llmobs_ragas_evaluators.py | 2 +- tests/llmobs/test_llmobs_service.py | 4 +- 6 files changed, 72 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml diff --git a/ddtrace/llmobs/_evaluators/runner.py b/ddtrace/llmobs/_evaluators/runner.py index 3d26998f1b4..f1583743c6a 100644 --- a/ddtrace/llmobs/_evaluators/runner.py +++ b/ddtrace/llmobs/_evaluators/runner.py @@ -41,7 +41,7 @@ def __init__(self, interval: float, llmobs_service=None, evaluators=None): if len(self.evaluators) > 0: return - evaluator_str = os.getenv("_DD_LLMOBS_EVALUATORS") + evaluator_str = os.getenv("DD_LLMOBS_EVALUATORS") if evaluator_str is None: return diff --git a/ddtrace/llmobs/_evaluators/sampler.py b/ddtrace/llmobs/_evaluators/sampler.py index 9dcb0759724..6b983d3a8dd 100644 --- a/ddtrace/llmobs/_evaluators/sampler.py +++ b/ddtrace/llmobs/_evaluators/sampler.py @@ -46,7 +46,7 @@ def __repr__(self): class EvaluatorRunnerSampler: - SAMPLING_RULES_ENV_VAR = "_DD_LLMOBS_EVALUATOR_SAMPLING_RULES" + SAMPLING_RULES_ENV_VAR = "DD_LLMOBS_EVALUATOR_SAMPLING_RULES" def __init__(self): self.rules = self.parse_rules() @@ -60,7 +60,7 @@ def sample(self, evaluator_label, span): def parse_rules(self) -> List[EvaluatorRunnerSamplingRule]: rules = [] sampling_rules_str = os.getenv(self.SAMPLING_RULES_ENV_VAR) - telemetry_writer.add_configuration("_DD_LLMOBS_EVALUATOR_SAMPLING_RULES", sampling_rules_str, origin="env") + telemetry_writer.add_configuration("DD_LLMOBS_EVALUATOR_SAMPLING_RULES", sampling_rules_str, origin="env") def parsing_failed_because(msg, maybe_throw_this): telemetry_writer.add_log( diff --git a/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml b/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml new file mode 100644 index 00000000000..47514409f25 --- /dev/null +++ b/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml @@ -0,0 +1,65 @@ +--- +#instructions: +# The style guide below provides explanations, instructions, and templates to write your own release note. +# Once finished, all irrelevant sections (including this instruction section) should be removed, +# and the release note should be committed with the rest of the changes. +# +# The main goal of a release note is to provide a brief overview of a change and provide actionable steps to the user. +# The release note should clearly communicate what the change is, why the change was made, and how a user can migrate their code. +# +# The release note should also clearly distinguish between announcements and user instructions. Use: +# * Past tense for previous/existing behavior (ex: ``resulted, caused, failed``) +# * Third person present tense for the change itself (ex: ``adds, fixes, upgrades``) +# * Active present infinitive for user instructions (ex: ``set, use, add``) +# +# Release notes should: +# * Use plain language +# * Be concise +# * Include actionable steps with the necessary code changes +# * Include relevant links (bug issues, upstream issues or release notes, documentation pages) +# * Use full sentences with sentence-casing and punctuation. +# * Before using Datadog specific acronyms/terminology, a release note must first introduce them with a definition. +# +# Release notes should not: +# * Be vague. Example: ``fixes an issue in tracing``. +# * Use overly technical language +# * Use dynamic links (``stable/latest/1.x`` URLs). Instead, use static links (specific version, commit hash) whenever possible so that they don't break in the future. +prelude: > + Usually in tandem with a new feature or major change, meant to provide context or background for a major change. + No specific format other than a required scope is provided and the author is requested to use their best judgment. + Format: : . +features: + - | + For new features such as a new integration or component. Use present tense with the following format: + Format: : This introduces . +issues: + - | + For known issues. Use present tense with the following format: + Format: : There is a known issue with . + . +upgrade: + - | + For enhanced functionality or if package dependencies are upgraded. If applicable, include instructions + for how a user can migrate their code. + Use present tense with the following formats, respectively for enhancements or removals: + Format: : This upgrades . With this upgrade, you can . + - | + Format: : is now removed. As an alternative to , you can use instead. +deprecations: + - | + Warning of a component or member of the public API being removed in the future. + Use present tense for when deprecation actually happens and future tense for when removal is planned to happen. + Include deprecation/removal timeline, as well as workarounds and alternatives in the following format: + Format: : is deprecated and will be removed in . + As an alternative to , you can use instead. +fixes: + - | + For reporting bug fixes. + Use past tense for the problem and present tense for the fix and solution in the following format: + Format: : This fix resolves an issue where caused . +other: + - | + For any change which does not fall into any of the above categories. Since changes falling into this category are + likely rare and not very similar to each other, no specific format other than a required scope is provided. + The author is requested to use their best judgment to ensure a quality release note. + Format: : . diff --git a/tests/llmobs/test_llmobs_evaluator_runner.py b/tests/llmobs/test_llmobs_evaluator_runner.py index 96104bb19be..82f5448a358 100644 --- a/tests/llmobs/test_llmobs_evaluator_runner.py +++ b/tests/llmobs/test_llmobs_evaluator_runner.py @@ -92,7 +92,7 @@ def test_evaluator_runner_on_exit(mock_writer_logs, run_python_code_in_subproces def test_evaluator_runner_unsupported_evaluator(): - with override_env({"_DD_LLMOBS_EVALUATORS": "unsupported"}): + with override_env({"DD_LLMOBS_EVALUATORS": "unsupported"}): with pytest.raises(ValueError): EvaluatorRunner(interval=0.01, llmobs_service=mock.MagicMock()) diff --git a/tests/llmobs/test_llmobs_ragas_evaluators.py b/tests/llmobs/test_llmobs_ragas_evaluators.py index 0e901fe93b4..aabc344fd46 100644 --- a/tests/llmobs/test_llmobs_ragas_evaluators.py +++ b/tests/llmobs/test_llmobs_ragas_evaluators.py @@ -209,7 +209,7 @@ def test_llmobs_with_faithfulness_emits_traces_and_evals_on_exit(mock_writer_log "PYTHONPATH": ":".join(pypath), "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "dummy-openai-api-key"), "_DD_LLMOBS_EVALUATOR_INTERVAL": "5", - "_DD_LLMOBS_EVALUATORS": "ragas_faithfulness", + "DD_LLMOBS_EVALUATORS": "ragas_faithfulness", "DD_TRACE_ENABLED": "0", } ) diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py index dad6accdcfb..c0eddc946ea 100644 --- a/tests/llmobs/test_llmobs_service.py +++ b/tests/llmobs/test_llmobs_service.py @@ -1388,7 +1388,7 @@ def test_llmobs_fork_recreates_and_restarts_eval_metric_writer(): def test_llmobs_fork_recreates_and_restarts_evaluator_runner(mock_ragas_evaluator): """Test that forking a process correctly recreates and restarts the EvaluatorRunner.""" - with override_env(dict(_DD_LLMOBS_EVALUATORS="ragas_faithfulness")): + with override_env(dict(DD_LLMOBS_EVALUATORS="ragas_faithfulness")): with mock.patch("ddtrace.llmobs._evaluators.runner.EvaluatorRunner.periodic"): llmobs_service.enable(_tracer=DummyTracer(), ml_app="test_app") original_pid = llmobs_service._instance.tracer._pid @@ -1757,7 +1757,7 @@ async def test_annotation_context_async_nested(llmobs): def test_service_enable_starts_evaluator_runner_when_evaluators_exist(): pytest.importorskip("ragas") with override_global_config(dict(_dd_api_key="", _llmobs_ml_app="")): - with override_env(dict(_DD_LLMOBS_EVALUATORS="ragas_faithfulness")): + with override_env(dict(DD_LLMOBS_EVALUATORS="ragas_faithfulness")): dummy_tracer = DummyTracer() llmobs_service.enable(_tracer=dummy_tracer) llmobs_instance = llmobs_service._instance From f642a1c51155ec4d1026362d592be68560485ccc Mon Sep 17 00:00:00 2001 From: lievan Date: Tue, 14 Jan 2025 12:40:58 -0500 Subject: [PATCH 2/2] add rel note --- .../ragas-integration-a81b696757c0e7a5.yaml | 72 +++---------------- 1 file changed, 10 insertions(+), 62 deletions(-) diff --git a/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml b/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml index 47514409f25..dd51941f1fa 100644 --- a/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml +++ b/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml @@ -1,65 +1,13 @@ --- -#instructions: -# The style guide below provides explanations, instructions, and templates to write your own release note. -# Once finished, all irrelevant sections (including this instruction section) should be removed, -# and the release note should be committed with the rest of the changes. -# -# The main goal of a release note is to provide a brief overview of a change and provide actionable steps to the user. -# The release note should clearly communicate what the change is, why the change was made, and how a user can migrate their code. -# -# The release note should also clearly distinguish between announcements and user instructions. Use: -# * Past tense for previous/existing behavior (ex: ``resulted, caused, failed``) -# * Third person present tense for the change itself (ex: ``adds, fixes, upgrades``) -# * Active present infinitive for user instructions (ex: ``set, use, add``) -# -# Release notes should: -# * Use plain language -# * Be concise -# * Include actionable steps with the necessary code changes -# * Include relevant links (bug issues, upstream issues or release notes, documentation pages) -# * Use full sentences with sentence-casing and punctuation. -# * Before using Datadog specific acronyms/terminology, a release note must first introduce them with a definition. -# -# Release notes should not: -# * Be vague. Example: ``fixes an issue in tracing``. -# * Use overly technical language -# * Use dynamic links (``stable/latest/1.x`` URLs). Instead, use static links (specific version, commit hash) whenever possible so that they don't break in the future. -prelude: > - Usually in tandem with a new feature or major change, meant to provide context or background for a major change. - No specific format other than a required scope is provided and the author is requested to use their best judgment. - Format: : . features: - | - For new features such as a new integration or component. Use present tense with the following format: - Format: : This introduces . -issues: - - | - For known issues. Use present tense with the following format: - Format: : There is a known issue with . - . -upgrade: - - | - For enhanced functionality or if package dependencies are upgraded. If applicable, include instructions - for how a user can migrate their code. - Use present tense with the following formats, respectively for enhancements or removals: - Format: : This upgrades . With this upgrade, you can . - - | - Format: : is now removed. As an alternative to , you can use instead. -deprecations: - - | - Warning of a component or member of the public API being removed in the future. - Use present tense for when deprecation actually happens and future tense for when removal is planned to happen. - Include deprecation/removal timeline, as well as workarounds and alternatives in the following format: - Format: : is deprecated and will be removed in . - As an alternative to , you can use instead. -fixes: - - | - For reporting bug fixes. - Use past tense for the problem and present tense for the fix and solution in the following format: - Format: : This fix resolves an issue where caused . -other: - - | - For any change which does not fall into any of the above categories. Since changes falling into this category are - likely rare and not very similar to each other, no specific format other than a required scope is provided. - The author is requested to use their best judgment to ensure a quality release note. - Format: : . + LLM Observability: This introduces an integration with the [RAGAS](https://docs.ragas.io/en/stable/) evaluation framework to continuously monitor + the performance of context-augmented LLM generations in production. + + The integration supports evaluating LLM inferences with the following RAGAS metrics: + - [Faithfulness](https://docs.ragas.io/en/stable/concepts/metrics/available_metrics/faithfulness/): measures if the LLM response is faithful to the provided context. + - [Answer Relevancy](https://docs.ragas.io/en/stable/concepts/metrics/available_metrics/answer_relevance/): measures how relevant the LLM response is to the user input. + - [Context Precision](https://docs.ragas.io/en/stable/concepts/metrics/available_metrics/context_precision/): measures how effectively the context is used in the generated response. + + # (TODI): UPDATE TO CORRECT LINK LATER!!! + For more information, please see the [RAGAS Integration documentation](https://docs.datadoghq.com/llm_observability/submit_evaluations/ragas_integration).