From c406da8f9ecacb96b93982b8acaad2f5d12dab6e Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 18 Nov 2024 17:03:48 -0500 Subject: [PATCH 1/4] Bump to outlines-core=0.1.17 for python 3.12-3.13 support (#1273) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fa7005afd..1fd2897aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dependencies = [ "pycountry", "airportsdata", "torch", - "outlines_core==0.1.14", + "outlines_core==0.1.17", ] dynamic = ["version"] From 2eab80c1ceabdf5ba340b921511f22c235e1c063 Mon Sep 17 00:00:00 2001 From: "Victoria Terenina (torymur)" Date: Thu, 21 Nov 2024 17:05:28 +0000 Subject: [PATCH 2/4] Turn off guide caching --- outlines/fsm/guide.py | 2 -- tests/generate/test_integration_llamacpp.py | 1 + tests/generate/test_integration_transformers.py | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/outlines/fsm/guide.py b/outlines/fsm/guide.py index d46228fe9..6b97d7729 100644 --- a/outlines/fsm/guide.py +++ b/outlines/fsm/guide.py @@ -15,7 +15,6 @@ ) from outlines import grammars -from outlines.caching import cache from outlines.fsm.parsing import PartialLark, PartialParserState if TYPE_CHECKING: @@ -73,7 +72,6 @@ def copy(self): return self -@cache() def cached_create_states_mapping(regex_string, tokenizer, *args, **kwargs): return uncached_create_states_mapping(regex_string, tokenizer, *args, **kwargs) diff --git a/tests/generate/test_integration_llamacpp.py b/tests/generate/test_integration_llamacpp.py index 8d4596d60..fd5be2171 100644 --- a/tests/generate/test_integration_llamacpp.py +++ b/tests/generate/test_integration_llamacpp.py @@ -274,6 +274,7 @@ def test_llama_cpp_pre_tokenizer_remains_broken(): generate.choice(model, ["skirt", "dress", "pen", "jacket"]) +@pytest.mark.skip("Caching for guide was temporarily turned off") def test_RegexGuide_caching(model, temp_cache_dir): import llama_cpp diff --git a/tests/generate/test_integration_transformers.py b/tests/generate/test_integration_transformers.py index 2462d9fcf..8acb87500 100644 --- a/tests/generate/test_integration_transformers.py +++ b/tests/generate/test_integration_transformers.py @@ -492,6 +492,7 @@ def test_transformers_use_existing_model_and_tokenizer(): assert isinstance(sequence, str) +@pytest.mark.skip("Caching for guide was temporarily turned off") def test_RegexGuide_caching(temp_cache_dir): import outlines.caching from outlines.fsm.guide import cached_create_states_mapping From f099f9668986340f1167a82c5e75fc1066c79594 Mon Sep 17 00:00:00 2001 From: "Victoria Terenina (torymur)" Date: Thu, 21 Nov 2024 19:34:22 +0000 Subject: [PATCH 3/4] Fix index interface in tests --- tests/fsm/test_guide.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/tests/fsm/test_guide.py b/tests/fsm/test_guide.py index 510faf4b0..bf25c43c4 100644 --- a/tests/fsm/test_guide.py +++ b/tests/fsm/test_guide.py @@ -59,7 +59,7 @@ def convert_token_to_string(self, token): tokenizer = MockTokenizer() fsm = RegexGuide.from_regex(regex_str, tokenizer) - assert fsm.states_to_token_maps == {0: {1: 1}} + assert fsm.states_to_token_maps.get_transitions() == {0: {1: 1}} instruction = fsm.get_next_instruction(0) assert isinstance(instruction, Generate) @@ -70,9 +70,6 @@ def convert_token_to_string(self, token): assert fsm.is_final_state(0) is False - for state in fsm.final_states: - assert fsm.is_final_state(state) is True - def test_regex_multi_byte_llama_like(): class MockTokenizer: @@ -100,7 +97,7 @@ def convert_token_to_string(self, token): tokenizer = MockTokenizer() fsm = RegexGuide.from_regex(regex_str, tokenizer) - assert fsm.states_to_token_maps == { + assert fsm.states_to_token_maps.get_transitions() == { 0: {5: 1, 4: 2}, 1: {6: 3}, 3: {7: 4}, @@ -116,9 +113,6 @@ def convert_token_to_string(self, token): assert fsm.is_final_state(0) is False - for state in fsm.final_states: - assert fsm.is_final_state(state) is True - def test_regex_multi_byte_gpt2_like(): class MockTokenizer: @@ -147,7 +141,7 @@ def convert_token_to_string(self, token): tokenizer = MockTokenizer() fsm = RegexGuide.from_regex(regex_str, tokenizer) - assert fsm.states_to_token_maps == { + assert fsm.states_to_token_maps.get_transitions() == { 0: {5: 1, 10: 2}, 1: {8: 5, 4: 3}, 2: {11: 3}, @@ -163,9 +157,6 @@ def convert_token_to_string(self, token): assert fsm.is_final_state(0) is False - for state in fsm.final_states: - assert fsm.is_final_state(state) is True - def test_regex_final_state(): """Make sure that the FSM stays in the final state as we keep generating""" From b55d31463cb6ed38fc0109e018f53ce0cdafbe19 Mon Sep 17 00:00:00 2001 From: Jeremy Zucker Date: Wed, 6 Nov 2024 16:25:29 -0800 Subject: [PATCH 4/4] Update generation.md `[Outlines model](../models)` does not return the link correctly. Tried switching to `[Outlines model](../models/models.md)` --- docs/reference/generation/generation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/generation/generation.md b/docs/reference/generation/generation.md index a14818514..930ad9d22 100644 --- a/docs/reference/generation/generation.md +++ b/docs/reference/generation/generation.md @@ -4,7 +4,7 @@ title: Generation # Generation -Once an [Outlines model](../models) is constructed you can use `outlines.generate` to generate text. Standard LLM generation is possible via `outlines.generate.text`, along with a variety of structured generation methods described below. (For a detailed technical explanation of how structured generation works, you may review the [Structured Generation Explanation](./structured_generation_explanation.md) page) +Once an [Outlines model](../models/models.md) is constructed you can use `outlines.generate` to generate text. Standard LLM generation is possible via `outlines.generate.text`, along with a variety of structured generation methods described below. (For a detailed technical explanation of how structured generation works, you may review the [Structured Generation Explanation](./structured_generation_explanation.md) page) Before generating text, you must construct an `outlines.model`. Example: