From 72e52ee131cc6317f96eb2264728659d2c023bb9 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Tue, 14 Jan 2025 11:26:22 -0500
Subject: [PATCH 1/2] wip rel note

---
 ddtrace/llmobs/_evaluators/runner.py          |  2 +-
 ddtrace/llmobs/_evaluators/sampler.py         |  4 +-
 .../ragas-integration-a81b696757c0e7a5.yaml   | 65 +++++++++++++++++++
 tests/llmobs/test_llmobs_evaluator_runner.py  |  2 +-
 tests/llmobs/test_llmobs_ragas_evaluators.py  |  2 +-
 tests/llmobs/test_llmobs_service.py           |  4 +-
 6 files changed, 72 insertions(+), 7 deletions(-)
 create mode 100644 releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml

diff --git a/ddtrace/llmobs/_evaluators/runner.py b/ddtrace/llmobs/_evaluators/runner.py
index 3d26998f1b4..f1583743c6a 100644
--- a/ddtrace/llmobs/_evaluators/runner.py
+++ b/ddtrace/llmobs/_evaluators/runner.py
@@ -41,7 +41,7 @@ def __init__(self, interval: float, llmobs_service=None, evaluators=None):
         if len(self.evaluators) > 0:
             return
 
-        evaluator_str = os.getenv("_DD_LLMOBS_EVALUATORS")
+        evaluator_str = os.getenv("DD_LLMOBS_EVALUATORS")
         if evaluator_str is None:
             return
 
diff --git a/ddtrace/llmobs/_evaluators/sampler.py b/ddtrace/llmobs/_evaluators/sampler.py
index 9dcb0759724..6b983d3a8dd 100644
--- a/ddtrace/llmobs/_evaluators/sampler.py
+++ b/ddtrace/llmobs/_evaluators/sampler.py
@@ -46,7 +46,7 @@ def __repr__(self):
 
 
 class EvaluatorRunnerSampler:
-    SAMPLING_RULES_ENV_VAR = "_DD_LLMOBS_EVALUATOR_SAMPLING_RULES"
+    SAMPLING_RULES_ENV_VAR = "DD_LLMOBS_EVALUATOR_SAMPLING_RULES"
 
     def __init__(self):
         self.rules = self.parse_rules()
@@ -60,7 +60,7 @@ def sample(self, evaluator_label, span):
     def parse_rules(self) -> List[EvaluatorRunnerSamplingRule]:
         rules = []
         sampling_rules_str = os.getenv(self.SAMPLING_RULES_ENV_VAR)
-        telemetry_writer.add_configuration("_DD_LLMOBS_EVALUATOR_SAMPLING_RULES", sampling_rules_str, origin="env")
+        telemetry_writer.add_configuration("DD_LLMOBS_EVALUATOR_SAMPLING_RULES", sampling_rules_str, origin="env")
 
         def parsing_failed_because(msg, maybe_throw_this):
             telemetry_writer.add_log(
diff --git a/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml b/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml
new file mode 100644
index 00000000000..47514409f25
--- /dev/null
+++ b/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml
@@ -0,0 +1,65 @@
+---
+#instructions:
+#    The style guide below provides explanations, instructions, and templates to write your own release note.
+#    Once finished, all irrelevant sections (including this instruction section) should be removed,
+#    and the release note should be committed with the rest of the changes.
+#
+#    The main goal of a release note is to provide a brief overview of a change and provide actionable steps to the user.
+#    The release note should clearly communicate what the change is, why the change was made, and how a user can migrate their code.
+#
+#    The release note should also clearly distinguish between announcements and user instructions. Use:
+#    * Past tense for previous/existing behavior (ex: ``resulted, caused, failed``)
+#    * Third person present tense for the change itself (ex: ``adds, fixes, upgrades``)
+#    * Active present infinitive for user instructions (ex: ``set, use, add``)
+#
+#    Release notes should:
+#    * Use plain language
+#    * Be concise
+#    * Include actionable steps with the necessary code changes
+#    * Include relevant links (bug issues, upstream issues or release notes, documentation pages)
+#    * Use full sentences with sentence-casing and punctuation.
+#    * Before using Datadog specific acronyms/terminology, a release note must first introduce them with a definition.
+#
+#    Release notes should not:
+#    * Be vague. Example: ``fixes an issue in tracing``.
+#    * Use overly technical language
+#    * Use dynamic links (``stable/latest/1.x`` URLs). Instead, use static links (specific version, commit hash) whenever possible so that they don't break in the future.
+prelude: >
+    Usually in tandem with a new feature or major change, meant to provide context or background for a major change. 
+    No specific format other than a required scope is provided and the author is requested to use their best judgment.
+    Format: <scope>: <add_prelude_and_context_here>.
+features:
+  - |
+    For new features such as a new integration or component. Use present tense with the following format:
+    Format: <scope>: This introduces <new_feature_or_component>.
+issues:
+  - |
+    For known issues. Use present tense with the following format:
+    Format: <scope>: There is a known <symptom_of_issue> issue with <affected_code>.
+    <provide_actionable_workaround_here>.
+upgrade:
+  - |
+    For enhanced functionality or if package dependencies are upgraded. If applicable, include instructions
+    for how a user can migrate their code.
+    Use present tense with the following formats, respectively for enhancements or removals:
+    Format: <scope>: This upgrades <present_tense_explanation>. With this upgrade, you can <actionable_step_for_user>.
+  - |
+    Format: <scope>: <affected_code> is now removed. As an alternative to <affected_code>, you can use <alternative> instead.
+deprecations:
+  - |
+    Warning of a component or member of the public API being removed in the future.
+    Use present tense for when deprecation actually happens and future tense for when removal is planned to happen. 
+    Include deprecation/removal timeline, as well as workarounds and alternatives in the following format:
+    Format: <scope>: <affected_code> is deprecated and will be removed in <version_to_be_removed>.
+            As an alternative to <affected_code>, you can use <alternative> instead.
+fixes:
+  - |
+    For reporting bug fixes.
+    Use past tense for the problem and present tense for the fix and solution in the following format:
+    Format: <scope>: This fix resolves an issue where <ABC_bug> caused <XYZ_situation>.
+other:
+  - |
+    For any change which does not fall into any of the above categories. Since changes falling into this category are 
+    likely rare and not very similar to each other, no specific format other than a required scope is provided. 
+    The author is requested to use their best judgment to ensure a quality release note.
+    Format: <scope>: <add_release_note_here>.
diff --git a/tests/llmobs/test_llmobs_evaluator_runner.py b/tests/llmobs/test_llmobs_evaluator_runner.py
index 96104bb19be..82f5448a358 100644
--- a/tests/llmobs/test_llmobs_evaluator_runner.py
+++ b/tests/llmobs/test_llmobs_evaluator_runner.py
@@ -92,7 +92,7 @@ def test_evaluator_runner_on_exit(mock_writer_logs, run_python_code_in_subproces
 
 
 def test_evaluator_runner_unsupported_evaluator():
-    with override_env({"_DD_LLMOBS_EVALUATORS": "unsupported"}):
+    with override_env({"DD_LLMOBS_EVALUATORS": "unsupported"}):
         with pytest.raises(ValueError):
             EvaluatorRunner(interval=0.01, llmobs_service=mock.MagicMock())
 
diff --git a/tests/llmobs/test_llmobs_ragas_evaluators.py b/tests/llmobs/test_llmobs_ragas_evaluators.py
index 0e901fe93b4..aabc344fd46 100644
--- a/tests/llmobs/test_llmobs_ragas_evaluators.py
+++ b/tests/llmobs/test_llmobs_ragas_evaluators.py
@@ -209,7 +209,7 @@ def test_llmobs_with_faithfulness_emits_traces_and_evals_on_exit(mock_writer_log
             "PYTHONPATH": ":".join(pypath),
             "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "dummy-openai-api-key"),
             "_DD_LLMOBS_EVALUATOR_INTERVAL": "5",
-            "_DD_LLMOBS_EVALUATORS": "ragas_faithfulness",
+            "DD_LLMOBS_EVALUATORS": "ragas_faithfulness",
             "DD_TRACE_ENABLED": "0",
         }
     )
diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py
index dad6accdcfb..c0eddc946ea 100644
--- a/tests/llmobs/test_llmobs_service.py
+++ b/tests/llmobs/test_llmobs_service.py
@@ -1388,7 +1388,7 @@ def test_llmobs_fork_recreates_and_restarts_eval_metric_writer():
 
 def test_llmobs_fork_recreates_and_restarts_evaluator_runner(mock_ragas_evaluator):
     """Test that forking a process correctly recreates and restarts the EvaluatorRunner."""
-    with override_env(dict(_DD_LLMOBS_EVALUATORS="ragas_faithfulness")):
+    with override_env(dict(DD_LLMOBS_EVALUATORS="ragas_faithfulness")):
         with mock.patch("ddtrace.llmobs._evaluators.runner.EvaluatorRunner.periodic"):
             llmobs_service.enable(_tracer=DummyTracer(), ml_app="test_app")
             original_pid = llmobs_service._instance.tracer._pid
@@ -1757,7 +1757,7 @@ async def test_annotation_context_async_nested(llmobs):
 def test_service_enable_starts_evaluator_runner_when_evaluators_exist():
     pytest.importorskip("ragas")
     with override_global_config(dict(_dd_api_key="<not-a-real-api-key>", _llmobs_ml_app="<ml-app-name>")):
-        with override_env(dict(_DD_LLMOBS_EVALUATORS="ragas_faithfulness")):
+        with override_env(dict(DD_LLMOBS_EVALUATORS="ragas_faithfulness")):
             dummy_tracer = DummyTracer()
             llmobs_service.enable(_tracer=dummy_tracer)
             llmobs_instance = llmobs_service._instance

From f642a1c51155ec4d1026362d592be68560485ccc Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Tue, 14 Jan 2025 12:40:58 -0500
Subject: [PATCH 2/2] add rel note

---
 .../ragas-integration-a81b696757c0e7a5.yaml   | 72 +++----------------
 1 file changed, 10 insertions(+), 62 deletions(-)

diff --git a/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml b/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml
index 47514409f25..dd51941f1fa 100644
--- a/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml
+++ b/releasenotes/notes/ragas-integration-a81b696757c0e7a5.yaml
@@ -1,65 +1,13 @@
 ---
-#instructions:
-#    The style guide below provides explanations, instructions, and templates to write your own release note.
-#    Once finished, all irrelevant sections (including this instruction section) should be removed,
-#    and the release note should be committed with the rest of the changes.
-#
-#    The main goal of a release note is to provide a brief overview of a change and provide actionable steps to the user.
-#    The release note should clearly communicate what the change is, why the change was made, and how a user can migrate their code.
-#
-#    The release note should also clearly distinguish between announcements and user instructions. Use:
-#    * Past tense for previous/existing behavior (ex: ``resulted, caused, failed``)
-#    * Third person present tense for the change itself (ex: ``adds, fixes, upgrades``)
-#    * Active present infinitive for user instructions (ex: ``set, use, add``)
-#
-#    Release notes should:
-#    * Use plain language
-#    * Be concise
-#    * Include actionable steps with the necessary code changes
-#    * Include relevant links (bug issues, upstream issues or release notes, documentation pages)
-#    * Use full sentences with sentence-casing and punctuation.
-#    * Before using Datadog specific acronyms/terminology, a release note must first introduce them with a definition.
-#
-#    Release notes should not:
-#    * Be vague. Example: ``fixes an issue in tracing``.
-#    * Use overly technical language
-#    * Use dynamic links (``stable/latest/1.x`` URLs). Instead, use static links (specific version, commit hash) whenever possible so that they don't break in the future.
-prelude: >
-    Usually in tandem with a new feature or major change, meant to provide context or background for a major change. 
-    No specific format other than a required scope is provided and the author is requested to use their best judgment.
-    Format: <scope>: <add_prelude_and_context_here>.
 features:
   - |
-    For new features such as a new integration or component. Use present tense with the following format:
-    Format: <scope>: This introduces <new_feature_or_component>.
-issues:
-  - |
-    For known issues. Use present tense with the following format:
-    Format: <scope>: There is a known <symptom_of_issue> issue with <affected_code>.
-    <provide_actionable_workaround_here>.
-upgrade:
-  - |
-    For enhanced functionality or if package dependencies are upgraded. If applicable, include instructions
-    for how a user can migrate their code.
-    Use present tense with the following formats, respectively for enhancements or removals:
-    Format: <scope>: This upgrades <present_tense_explanation>. With this upgrade, you can <actionable_step_for_user>.
-  - |
-    Format: <scope>: <affected_code> is now removed. As an alternative to <affected_code>, you can use <alternative> instead.
-deprecations:
-  - |
-    Warning of a component or member of the public API being removed in the future.
-    Use present tense for when deprecation actually happens and future tense for when removal is planned to happen. 
-    Include deprecation/removal timeline, as well as workarounds and alternatives in the following format:
-    Format: <scope>: <affected_code> is deprecated and will be removed in <version_to_be_removed>.
-            As an alternative to <affected_code>, you can use <alternative> instead.
-fixes:
-  - |
-    For reporting bug fixes.
-    Use past tense for the problem and present tense for the fix and solution in the following format:
-    Format: <scope>: This fix resolves an issue where <ABC_bug> caused <XYZ_situation>.
-other:
-  - |
-    For any change which does not fall into any of the above categories. Since changes falling into this category are 
-    likely rare and not very similar to each other, no specific format other than a required scope is provided. 
-    The author is requested to use their best judgment to ensure a quality release note.
-    Format: <scope>: <add_release_note_here>.
+    LLM Observability: This introduces an integration with the [RAGAS](https://docs.ragas.io/en/stable/) evaluation framework to continuously monitor 
+                      the performance of context-augmented LLM generations in production.
+
+                      The integration supports evaluating LLM inferences with the following RAGAS metrics:
+                      - [Faithfulness](https://docs.ragas.io/en/stable/concepts/metrics/available_metrics/faithfulness/): measures if the LLM response is faithful to the provided context.
+                      - [Answer Relevancy](https://docs.ragas.io/en/stable/concepts/metrics/available_metrics/answer_relevance/): measures how relevant the LLM response is to the user input.
+                      - [Context Precision](https://docs.ragas.io/en/stable/concepts/metrics/available_metrics/context_precision/):  measures how effectively the context is used in the generated response.
+
+                      # (TODI): UPDATE TO CORRECT LINK LATER!!!
+                      For more information, please see the [RAGAS Integration documentation](https://docs.datadoghq.com/llm_observability/submit_evaluations/ragas_integration).