Skip to content

Commit

Permalink
Do not automatically cache models in temp dirs (#462)
Browse files Browse the repository at this point in the history
* Do not automatically cache models in temp dirs

* Fix for Python 3.8

Path.is_relative_to() was added in Python 3.9

* Disable speedup test for CausalLM with pkv

Speedup is small on the Github Actions runner hardware so this test regularly
fails even with a speedup threshold of only 1.1

* Copy ov_config for seq2seq models
  • Loading branch information
helena-intel authored Oct 31, 2023
1 parent 87da8b0 commit 5320512
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 47 deletions.
8 changes: 4 additions & 4 deletions optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import logging
import os
from pathlib import Path
from tempfile import TemporaryDirectory
from tempfile import TemporaryDirectory, gettempdir
from typing import Dict, Optional, Union

import openvino
Expand Down Expand Up @@ -339,11 +339,11 @@ def compile(self):
if self.request is None:
logger.info(f"Compiling the model to {self._device} ...")
ov_config = {**self.ov_config}
if "CACHE_DIR" not in self.ov_config.keys():
# Set default CACHE_DIR only if it is not set.
if "CACHE_DIR" not in self.ov_config.keys() and not str(self.model_save_dir).startswith(gettempdir()):
# Set default CACHE_DIR only if it is not set, and if the model is not in a temporary directory
cache_dir = Path(self.model_save_dir).joinpath("model_cache")
ov_config["CACHE_DIR"] = str(cache_dir)
logger.info(f"Set CACHE_DIR to {str(cache_dir)}")
logger.info(f"Setting OpenVINO CACHE_DIR to {str(cache_dir)}")
self.request = core.compile_model(self.model, self._device, ov_config)

def _reshape(
Expand Down
8 changes: 3 additions & 5 deletions optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import os
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory
from tempfile import TemporaryDirectory, gettempdir
from typing import Any, Dict, List, Optional, Union

import numpy as np
Expand Down Expand Up @@ -539,10 +539,8 @@ def __init__(
self._model_dir = Path(model_dir or parent_model._model_save_dir)
config_path = self._model_dir / model_name / self.CONFIG_NAME
self.config = self.parent_model._dict_from_json_file(config_path) if config_path.is_file() else {}

# TODO : disable if self._model_dir tmp directory
if "CACHE_DIR" not in self.ov_config:
self.ov_config["CACHE_DIR"] = os.path.join(self._model_dir, self._model_name)
if "CACHE_DIR" not in self.ov_config.keys() and not str(self._model_dir).startswith(gettempdir()):
self.ov_config["CACHE_DIR"] = os.path.join(self._model_dir, self._model_name, "model_cache")

def _compile(self):
if self.request is None:
Expand Down
38 changes: 20 additions & 18 deletions optimum/intel/openvino/modeling_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import logging
from pathlib import Path
from tempfile import gettempdir
from typing import Dict, Optional, Tuple

import numpy as np
Expand Down Expand Up @@ -202,31 +203,32 @@ def __init__(
self.decoder_with_past = None
enable_compilation = kwargs.get("compile", True)
encoder_cache_dir = Path(self.model_save_dir).joinpath("encoder_cache")
encoder_cache_dir.mkdir(parents=True, exist_ok=True)
ov_encoder_config = (
{**self.ov_config}
if "CACHE_DIR" in self.ov_config.keys()
else {**self.ov_config, "CACHE_DIR": str(encoder_cache_dir)}
)
ov_encoder_config = {**self.ov_config}

if "CACHE_DIR" not in ov_encoder_config.keys() and not str(self.model_save_dir).startswith(gettempdir()):
ov_encoder_config["CACHE_DIR"] = str(encoder_cache_dir)

self.encoder = OVEncoder(
self.encoder_model, self._device, ov_encoder_config, main_input_name=self.main_input_name
)

decoder_cache_dir = Path(self.model_save_dir).joinpath("decoder_cache")
decoder_cache_dir.mkdir(parents=True, exist_ok=True)
ov_decoder_config = (
{**self.ov_config}
if "CACHE_DIR" in self.ov_config.keys()
else {**self.ov_config, "CACHE_DIR": str(decoder_cache_dir)}
)
ov_decoder_config = {**self.ov_config}

if "CACHE_DIR" not in ov_decoder_config.keys() and not str(self.model_save_dir).startswith(gettempdir()):
ov_decoder_config["CACHE_DIR"] = str(decoder_cache_dir)

self.decoder = OVDecoder(self.decoder_model, self._device, ov_decoder_config)

if self.use_cache:
decoder_past_cache_dir = Path(self.model_save_dir).joinpath("decoder_past_cache")
decoder_past_cache_dir.mkdir(parents=True, exist_ok=True)
ov_decoder_past_config = (
{**self.ov_config}
if "CACHE_DIR" in self.ov_config.keys()
else {**self.ov_config, "CACHE_DIR": str(decoder_past_cache_dir)}
)
ov_decoder_past_config = {**self.ov_config}

if "CACHE_DIR" not in ov_decoder_past_config.keys() and not str(self.model_save_dir).startswith(
gettempdir()
):
ov_decoder_past_config["CACHE_DIR"] = str(decoder_past_cache_dir)

self.decoder_with_past = OVDecoder(self.decoder_with_past_model, self._device, ov_decoder_past_config)
if enable_compilation:
self.compile()
Expand Down
45 changes: 25 additions & 20 deletions tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,19 @@ def test_load_from_hub_and_save_model(self):
self.assertIsInstance(loaded_model.config, PretrainedConfig)
loaded_model_outputs = loaded_model(**tokens)

# Test that model caching is automatically enabled
openvino_cache_dir = loaded_model.model_save_dir / "model_cache"
self.assertTrue(openvino_cache_dir.is_dir())
self.assertGreaterEqual(len(list(openvino_cache_dir.glob("*.blob"))), 1)

# Test specifying ov_config with throughput hint and manual cache dir
manual_openvino_cache_dir = loaded_model.model_save_dir / "manual_model_cache"
ov_config = {"CACHE_DIR": str(manual_openvino_cache_dir), "PERFORMANCE_HINT": "THROUGHPUT"}
loaded_model = OVModelForSequenceClassification.from_pretrained(self.OV_MODEL_ID, ov_config=ov_config)
self.assertTrue(manual_openvino_cache_dir.is_dir())
self.assertGreaterEqual(len(list(manual_openvino_cache_dir.glob("*.blob"))), 1)
self.assertEqual(loaded_model.request.get_property("PERFORMANCE_HINT").name, "THROUGHPUT")

with tempfile.TemporaryDirectory() as tmpdirname:
loaded_model.save_pretrained(tmpdirname)
folder_contents = os.listdir(tmpdirname)
Expand All @@ -120,6 +133,7 @@ def test_load_from_hub_and_save_model(self):

outputs = model(**tokens)
self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits))

del loaded_model
del model
gc.collect()
Expand Down Expand Up @@ -276,6 +290,10 @@ def test_pipeline(self, model_arch):
self.assertTrue(not model.is_dynamic)
self.assertGreaterEqual(outputs[0]["score"], 0.0)
self.assertIsInstance(outputs[0]["label"], str)
# Test that model caching was not automatically enabled for exported model
openvino_cache_dir = model.model_save_dir / "model_cache"
self.assertFalse(openvino_cache_dir.is_dir())

del model
del pipe
gc.collect()
Expand Down Expand Up @@ -466,7 +484,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
"pegasus",
)
GENERATION_LENGTH = 100
SPEEDUP_CACHE = 1.1

@parameterized.expand(SUPPORTED_ARCHITECTURES)
def test_compare_to_transformers(self, model_arch):
Expand Down Expand Up @@ -539,29 +556,17 @@ def test_compare_with_and_without_past_key_values(self):
tokens = tokenizer("This is a sample input", return_tensors="pt")

model_with_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True)
# Warmup
_ = model_with_pkv.generate(**tokens)
with Timer() as with_pkv_timer:
outputs_model_with_pkv = model_with_pkv.generate(
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
)

outputs_model_with_pkv = model_with_pkv.generate(
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
)
model_without_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=False)

# Warmup
_ = model_without_pkv.generate(**tokens)
with Timer() as without_pkv_timer:
outputs_model_without_pkv = model_without_pkv.generate(
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
)
outputs_model_without_pkv = model_without_pkv.generate(
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
)
self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv))
self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH)
self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH)
self.assertTrue(
without_pkv_timer.elapsed / with_pkv_timer.elapsed > self.SPEEDUP_CACHE,
f"With pkv latency: {with_pkv_timer.elapsed:.3f} ms, without pkv latency: {without_pkv_timer.elapsed:.3f} ms,"
f" speedup: {without_pkv_timer.elapsed / with_pkv_timer.elapsed:.3f}",
)

del model_with_pkv
del model_without_pkv
gc.collect()
Expand Down

0 comments on commit 5320512

Please sign in to comment.