From b8cb7599bf8ed631c414b9ef0452d5003bd01be9 Mon Sep 17 00:00:00 2001 From: Meet Doshi Date: Wed, 23 Oct 2024 09:50:29 +0530 Subject: [PATCH] Replacing assertions (#98) * Replacing assertions Signed-off-by: quic-meet * Changes Signed-off-by: Meet Doshi * misisng device ids param Signed-off-by: Meet Doshi * changes Signed-off-by: Meet Doshi * changes Signed-off-by: Meet Doshi --------- Signed-off-by: quic-meet Signed-off-by: Meet Doshi Co-authored-by: Meet Doshi Co-authored-by: Amit Raj <168538872+quic-amitraj@users.noreply.github.com> --- QEfficient/base/common.py | 12 +++++------ QEfficient/compile/compile_helper.py | 8 +++---- .../exporter/export_hf_to_cloud_ai_100.py | 9 +++++--- QEfficient/generation/cloud_infer.py | 21 ++++++++++++------- .../generation/text_generation_inference.py | 8 +++---- .../transformers/models/modeling_auto.py | 7 ++++--- QEfficient/transformers/transform.py | 8 +++---- QEfficient/utils/_utils.py | 3 ++- 8 files changed, 43 insertions(+), 33 deletions(-) diff --git a/QEfficient/base/common.py b/QEfficient/base/common.py index 866c62184..d41ac59b1 100644 --- a/QEfficient/base/common.py +++ b/QEfficient/base/common.py @@ -46,9 +46,10 @@ def get_hf_model_type(hf_model_path: str) -> QEFF_MODEL_TYPE: """ Loads model config file and returns the type of the model (i.e. LLMs, SD, quantized etc.) as supported by the library. """ - assert os.path.isdir( - hf_model_path - ), "Pleae pass local dir path where the model is downloaded; use `QEfficient.utils.login_and_download_hf_lm` for downloading hf model" + if not os.path.isdir(hf_model_path): + raise FileNotFoundError( + "Please pass local dir path where the model is downloaded; use `QEfficient.utils.login_and_download_hf_lm` for downloading hf model" + ) config, kwargs = AutoConfig.from_pretrained( hf_model_path, return_unused_kwargs=True, @@ -84,9 +85,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs) -> pretrained_model_name_or_path = login_and_download_hf_lm(pretrained_model_name_or_path, *args, **kwargs) model_type = get_hf_model_type(hf_model_path=pretrained_model_name_or_path) qeff_auto_model_class = MODEL_TYPE_TO_QEFF_AUTO_MODEL_MAP[model_type] - assert issubclass( - qeff_auto_model_class, QEFFBaseModel - ), f"Expected class that inherits {QEFFBaseModel}, got {type(qeff_auto_model_class)}" + if not issubclass(qeff_auto_model_class, QEFFBaseModel): + raise Exception(f"Expected class that inherits {QEFFBaseModel}, got {type(qeff_auto_model_class)}") return qeff_auto_model_class.from_pretrained( pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs diff --git a/QEfficient/compile/compile_helper.py b/QEfficient/compile/compile_helper.py index f4882efc0..980c502a7 100644 --- a/QEfficient/compile/compile_helper.py +++ b/QEfficient/compile/compile_helper.py @@ -59,10 +59,10 @@ def compile_kv_model_on_cloud_ai_100( if os.path.isdir(aic_binary_dir): shutil.rmtree(aic_binary_dir) - assert os.path.isfile( - specializations_json - ), f"Please use 'QEfficient.compile', as {specializations_json} file was not found" - assert os.path.isfile(custom_io_path), f"{custom_io_path} file was not found!" + if not os.path.isfile(specializations_json): + raise FileNotFoundError(f"Please use 'QEfficient.compile', as {specializations_json} file was not found") + if not os.path.isfile(custom_io_path): + raise FileNotFoundError(f"{custom_io_path} file was not found!") command = [ "/opt/qti-aic/exec/qaic-exec", f"-m={onnx_path}", diff --git a/QEfficient/exporter/export_hf_to_cloud_ai_100.py b/QEfficient/exporter/export_hf_to_cloud_ai_100.py index 706d14107..d533de1f4 100644 --- a/QEfficient/exporter/export_hf_to_cloud_ai_100.py +++ b/QEfficient/exporter/export_hf_to_cloud_ai_100.py @@ -177,7 +177,8 @@ def convert_to_cloud_kvstyle( logger.warning(f"Overriding {onnx_dir_path}") shutil.rmtree(onnx_dir_path) - assert qeff_model.is_transformed, f"please pass the {qeff_model.__class__.__name__} after transform API" + if not qeff_model.is_transformed: + raise Exception(f"please pass the {qeff_model.__class__.__name__} after transform API") # Decide path for saving exported ONNX files. model_name = export_kvstyle_transformed_model_to_onnx( @@ -220,8 +221,10 @@ def export_kvstyle_transformed_model_to_onnx( output_names = list(pt_outputs.keys()) # Raise error if expected outputs are not present - assert "logits" in output_names, "logits not found in output" - assert "past_key_values" in output_names, "past_key_values not found in output" + if "logits" not in output_names: + raise KeyError("logits not found in output") + if "past_key_values" not in output_names: + raise KeyError("past_key_values not found in output") # Build inputs for next iteration from outputs # Build inputs for decode diff --git a/QEfficient/generation/cloud_infer.py b/QEfficient/generation/cloud_infer.py index 998dc82b2..a90804da6 100644 --- a/QEfficient/generation/cloud_infer.py +++ b/QEfficient/generation/cloud_infer.py @@ -66,14 +66,15 @@ def __init__( self.context = qaicrt.Context() self.queue = qaicrt.Queue(self.context, 0) # Async API if enable_debug_logs: - assert ( - self.context.setLogLevel(qaicrt.QLogLevel.QL_DEBUG) == qaicrt.QStatus.QS_SUCCESS - ), "Failed to setLogLevel" + if self.context.setLogLevel(qaicrt.QLogLevel.QL_DEBUG) != qaicrt.QStatus.QS_SUCCESS: + raise RuntimeError("Failed to setLogLevel") + qpc = qaicrt.Qpc(qpc_path) # Load IO Descriptor iodesc = aicapi.IoDesc() status, iodesc_data = qpc.getIoDescriptor() - assert status == qaicrt.QStatus.QS_SUCCESS, "Failed to getIoDescriptor" + if status != qaicrt.QStatus.QS_SUCCESS: + raise RuntimeError("Failed to getIoDescriptor") iodesc.ParseFromString(bytes(iodesc_data)) self.allowed_shapes = [ [(aic_to_np_dtype_mapping[x.type].itemsize, list(x.dims)) for x in allowed_shape.shapes] @@ -87,7 +88,8 @@ def __init__( if device_ids and len(device_ids) > 1: prog_properties.devMapping = ":".join(map(str, device_ids)) self.program = qaicrt.Program(self.context, None, qpc, prog_properties) - assert self.program.load() == qaicrt.QStatus.QS_SUCCESS, "Failed to load program" + if self.program.load() != qaicrt.QStatus.QS_SUCCESS: + raise RuntimeError("Failed to load program") if activate: self.activate() # Create input qbuffers and buf_dims @@ -157,11 +159,13 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: """ # Set inputs self.set_buffers(inputs) - assert self.execObj.setData(self.qbuffers, self.buf_dims) == qaicrt.QStatus.QS_SUCCESS, "Failed to setData" + if self.execObj.setData(self.qbuffers, self.buf_dims) != qaicrt.QStatus.QS_SUCCESS: + raise MemoryError("Failed to setData") # # Run with sync API # if self.execObj.run(self.qbuffers) != qaicrt.QStatus.QS_SUCCESS: # Run with async API - assert self.queue.enqueue(self.execObj) == qaicrt.QStatus.QS_SUCCESS, "Failed to enqueue" + if self.queue.enqueue(self.execObj) != qaicrt.QStatus.QS_SUCCESS: + raise MemoryError("Failed to enqueue") if self.execObj.waitForCompletion() != qaicrt.QStatus.QS_SUCCESS: error_message = "Failed to run" # Print additional error messages for unmatched dimension error @@ -187,7 +191,8 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: raise ValueError(error_message) # Get output buffers status, output_qbuffers = self.execObj.getData() - assert status == qaicrt.QStatus.QS_SUCCESS, "Failed to getData" + if status != qaicrt.QStatus.QS_SUCCESS: + raise MemoryError("Failed to getData") # Build output outputs = {} for output_name in self.output_names: diff --git a/QEfficient/generation/text_generation_inference.py b/QEfficient/generation/text_generation_inference.py index 8311cc117..b9d73f53f 100755 --- a/QEfficient/generation/text_generation_inference.py +++ b/QEfficient/generation/text_generation_inference.py @@ -157,9 +157,8 @@ def get_compilation_dims(qpc_path: str) -> Tuple[int, int]: def get_input_prompts(prompt: str, prompts_txt_file_path: str) -> List[str]: - assert ( - prompt is not None or prompts_txt_file_path is not None - ), "Please pass at least one argument either using --prompt or --prompts_txt_file_path" + if prompt is None and prompts_txt_file_path is None: + raise ValueError("Please pass at least one argument either using --prompt or --prompts_txt_file_path") if prompts_txt_file_path is not None: if prompt is not None: logger.warning("Found inputs passed using txt file as well as CLI, taking inputs from given txt file") @@ -444,7 +443,8 @@ def _fetch_generation_len(self, generation_len, max_gen_len): "Passed generation_len is greater than allowed length. " "Make sure this model supports sliding window, such as Mistral" ) - assert generation_len > 0, "generation length should be greater than zero" + if generation_len <= 0: + raise ValueError("generation length should be greater than zero") return generation_len def prepare_decode_inputs(self): diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index 5cd058bea..164e08a8b 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -211,7 +211,6 @@ def export(self) -> str: Returns: :str: Path of the generated ``ONNX`` graph. """ - assert self.is_transformed, "Please first run transform on the QEFFAutoModelForCausalLM object" # Export _, onnx_model_path = QEfficient.export( model_name=self.model_card_name, @@ -366,11 +365,13 @@ def generate(self, prompts: List[str], device_id: List[int] = None, runtime: str ``optional`` Args: :runtime (str, optional): Only ``AI_100`` runtime is supported as of now; ``ONNXRT`` and ``PyTorch`` coming soon. Defaults to "AI_100". """ - assert Runtime(runtime) == Runtime.AI_100, "Only AI_100 runtime is supported right now via generate API" + if Runtime(runtime) != Runtime.AI_100: + raise ValueError("Only AI_100 runtime is supported right now via generate API") self.run_cloud_ai_100(prompts=prompts, device_id=device_id, **kwargs) def run_cloud_ai_100(self, prompts: List[str], device_id: List[int] = None, **kwargs): - assert isinstance(self.qpc_path, str), "Please run compile API first!" + if not isinstance(self.qpc_path, str): + raise TypeError("Please run compile API first!") generation_len = kwargs.pop("generation_len", None) return QEfficient.cloud_ai_100_exec_kv( self.tokenizer, diff --git a/QEfficient/transformers/transform.py b/QEfficient/transformers/transform.py index e2c65c6c3..8bb084fbf 100644 --- a/QEfficient/transformers/transform.py +++ b/QEfficient/transformers/transform.py @@ -75,9 +75,8 @@ def transform_lm(model: nn.Module) -> nn.Module: # Check with new params hash later_params_hash = get_params_hash(model) - assert ( - prior_params_hash == later_params_hash - ), "Weights were changed in the transform process, please report an issue" + if prior_params_hash != later_params_hash: + raise RuntimeError("Weights were changed in the transform process, please report an issue") # Replace the Dyanmic cache utils update api transformers.cache_utils.DynamicCache.update = QEffDynamicCache.update @@ -94,7 +93,8 @@ def transform(model: QEFFBaseModel, form_factor="cloud"): model (torch.nn.Module): object of any instance of class that is child of `QEFFBaseAutoModelFactory` form_factor (str): form factor configuration for optimizing the model, available options=["cloud", "edge"]. """ - assert form_factor == "cloud", "Only form_factor='cloud' is supported as of now!" + if form_factor != "cloud": + raise ValueError("Only form_factor='cloud' is supported as of now!") # FIXME: move this to class and use model.transform() if AUTO_MODEL_MAP_TO_MODEL_TYPE_MAP.get(model.__class__, None) == QEFF_MODEL_TYPE.CAUSALLM: transform_lm(model.model) # type: ignore diff --git a/QEfficient/utils/_utils.py b/QEfficient/utils/_utils.py index b0bc7398b..784772d80 100644 --- a/QEfficient/utils/_utils.py +++ b/QEfficient/utils/_utils.py @@ -224,7 +224,8 @@ def padding_check_and_fix(tokenizer: Union[PreTrainedTokenizer, PreTrainedTokeni tokenizer.padding_side = "right" if tokenizer.pad_token_id is None: - assert tokenizer.eos_token_id is not None, "Found tokenizer.eos_token_id to be None, expected int" + if not isinstance(tokenizer.eos_token_id, int): + raise TypeError("Found tokenizer.eos_token_id to be None, expected int") # If Pad token is out of range of vocab size if tokenizer.eos_token_id < tokenizer.vocab_size: tokenizer.pad_token_id = tokenizer.eos_token_id