A wrapper that allows to limit the number of calls made by an LLM bef…

…ore it turns into a noop model PiperOrigin-RevId: 676039260 Change-Id: Ie0810f791d189b3fd8007638822eafa08e3a368a
google-deepmind · Sep 18, 2024 · 5ed21fe · 5ed21fe
1 parent dd30b6a
commit 5ed21fe
Show file tree

Hide file tree

Showing 2 changed files with 93 additions and 1 deletion.
diff --git a/concordia/language_model/call_limit_wrapper.py b/concordia/language_model/call_limit_wrapper.py
@@ -0,0 +1,92 @@
+# Copyright 2023 DeepMind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Wrapper to limit calls to an underlying language model."""
+
+from collections.abc import Collection, Mapping, Sequence
+from typing import Any
+
+from absl import logging
+
+from concordia.language_model import language_model
+from typing_extensions import override
+
+
+class CallLimitLanguageModel(language_model.LanguageModel):
+  """Wraps an underlying language model and limits calls to it.
+
+  Once the limit on calls is reached the model outputs an empty string on sample
+  text and returns the first response on choice questions. If the underlying
+  model is calling sample_text or sample_choice, it will result in counting
+  towards the call limit.
+  """
+
+  def __init__(
+      self,
+      model: language_model.LanguageModel,
+      max_calls: int = 1000,
+  ) -> None:
+    """Wrap the underlying language model with a call limit.
+
+    Args:
+      model: A language model to wrap with a call limit.
+      max_calls: the maximum number of calls to the underlying model.
+    """
+    self._model = model
+    self._max_calls = max_calls
+    self._calls = 0
+
+  @override
+  def sample_text(
+      self,
+      prompt: str,
+      *,
+      max_tokens: int = language_model.DEFAULT_MAX_TOKENS,
+      terminators: Collection[str] = language_model.DEFAULT_TERMINATORS,
+      temperature: float = language_model.DEFAULT_TEMPERATURE,
+      timeout: float = language_model.DEFAULT_TIMEOUT_SECONDS,
+      seed: int | None = None,
+  ) -> str:
+    if self._calls >= self._max_calls:
+      logging.log_first_n(
+          logging.ERROR, 'Call limit reached', n=self._max_calls
+      )
+      return ''
+
+    self._calls += 1
+    return self._model.sample_text(
+        prompt,
+        max_tokens=max_tokens,
+        terminators=terminators,
+        temperature=temperature,
+        timeout=timeout,
+        seed=seed,
+    )
+
+  @override
+  def sample_choice(
+      self,
+      prompt: str,
+      responses: Sequence[str],
+      *,
+      seed: int | None = None,
+  ) -> tuple[int, str, Mapping[str, Any]]:
+    if self._calls >= self._max_calls:
+      logging.log_first_n(
+          logging.ERROR, 'Call limit reached', n=self._max_calls
+      )
+      return 0, responses[0], {}
+
+    self._calls += 1
+    return self._model.sample_choice(prompt, responses, seed=seed)
diff --git a/concordia/language_model/no_language_model.py b/concordia/language_model/no_language_model.py
@@ -51,4 +51,4 @@ def sample_choice(
       *,
       seed: int | None = None,
   ) -> tuple[int, str, Mapping[str, Any]]:
-    return 0, 'yes', {}
+    return 0, responses[0], {}