[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
intel · Dec 19, 2024 · 2186b0a · 2186b0a
1 parent c937884
commit 2186b0a
Show file tree

Hide file tree

Showing 12 changed files with 93 additions and 28 deletions.
diff --git a/neural_compressor/torch/algorithms/fp8_quant/model_configs.py b/neural_compressor/torch/algorithms/fp8_quant/model_configs.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Dict, Optional, Tuple, Any
 from neural_compressor.torch.utils.auto_accelerator import auto_detect_accelerator
 

diff --git a/neural_compressor/torch/algorithms/fp8_quant/observer.py b/neural_compressor/torch/algorithms/fp8_quant/observer.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """Base class and helper functions for registering observers."""
 
 from typing import Dict, Optional, Any

diff --git a/neural_compressor/torch/algorithms/fp8_quant/patched_module_base.py b/neural_compressor/torch/algorithms/fp8_quant/patched_module_base.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """Base class for patched modules and helper functions for registering patched modules."""
 
 from typing import Union, List, Type, Optional

diff --git a/neural_compressor/torch/algorithms/fp8_quant/scaling_method_base.py b/neural_compressor/torch/algorithms/fp8_quant/scaling_method_base.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """Base class and helper functions for registering scaling methods."""
 
 from typing import Dict, Optional, Any

diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py
@@ -22,7 +22,7 @@
 
 import torch
 
-from neural_compressor.common.utils import save_config_mapping, AWQ, TEQ
+from neural_compressor.common.utils import AWQ, TEQ, save_config_mapping
 from neural_compressor.torch.utils import (
     HPU_SAFE_WEIGHTS_NAME,
     HPU_WEIGHT_NAME,
@@ -36,7 +36,10 @@
 from .modules import HPUWeightOnlyLinear, INCWeightOnlyLinear, MulLinear
 from .utility import convert_dtype_str2torch
 
-format_woqlinear_mapping = {SaveLoadFormat.HUGGINGFACE: INCWeightOnlyLinear, SaveLoadFormat.DEFAULT: INCWeightOnlyLinear}
+format_woqlinear_mapping = {
+    SaveLoadFormat.HUGGINGFACE: INCWeightOnlyLinear,
+    SaveLoadFormat.DEFAULT: INCWeightOnlyLinear,
+}
 device_woqlinear_mapping = {"cpu": INCWeightOnlyLinear, "hpu": HPUWeightOnlyLinear}
 
 
@@ -199,7 +202,7 @@ def load_inc_format_woq_model(self):
         model = self._build_woq_model()
 
         # load remaining pretrained weight to weight-only quantization model
-        is_meta_device = hasattr(self.original_model, "device") and self.original_model.device.type == 'meta'
+        is_meta_device = hasattr(self.original_model, "device") and self.original_model.device.type == "meta"
         algo_name = next(iter(self.quantization_config[next(iter(self.quantization_config))].keys()))
         if is_meta_device or algo_name in [AWQ, TEQ]:
             # AWQ and TEQ will update some weight except WOQLinear to handle additional input_scale
@@ -297,7 +300,7 @@ def _load_data_to_new_module_hqq(self, new_module, module_name):
         new_module_state_dict = {}
         for key in self.loaded_state_dict:
             if key.startswith(module_name):
-                new_key = key[len(module_name) + 1:]  # Remove module_name and the following dot
+                new_key = key[len(module_name) + 1 :]  # Remove module_name and the following dot
                 new_module_state_dict[new_key] = self.loaded_state_dict[key]
                 self.loaded_state_dict_keys.remove(key)
         new_module.load_state_dict(new_module_state_dict, strict=False)
@@ -863,7 +866,7 @@ def _load_remaining_pretrained_weight(self, model):
         for shard_file in resolved_archive_file:
             state_dict = load_state_dict(shard_file)
 
-            params_dict={
+            params_dict = {
                 "model": model,
                 "state_dict": state_dict,
                 "start_prefix": "",
@@ -877,6 +880,7 @@ def _load_remaining_pretrained_weight(self, model):
             }
 
             import transformers
+
             if transformers.__version__ < "4.45.0":
                 params_dict["loaded_state_dict_keys"] = self.loaded_state_dict_keys
 

diff --git a/neural_compressor/torch/quantization/quantize.py b/neural_compressor/torch/quantization/quantize.py
@@ -20,7 +20,7 @@
 
 from neural_compressor.common.base_config import BaseConfig, ComposableConfig, config_registry
 from neural_compressor.common.utils import Mode, call_counter, log_process
-from neural_compressor.torch.quantization.config import SmoothQuantConfig, INT8StaticQuantConfig
+from neural_compressor.torch.quantization.config import INT8StaticQuantConfig, SmoothQuantConfig
 from neural_compressor.torch.utils import is_ipex_available, logger
 from neural_compressor.torch.utils.utility import WHITE_MODULE_LIST, algos_mapping, get_model_info
 

diff --git a/neural_compressor/torch/quantization/save_load_entry.py b/neural_compressor/torch/quantization/save_load_entry.py
@@ -32,6 +32,7 @@
     FP8_QUANT: FP8Config,
 }
 
+
 def save(model, checkpoint_dir="saved_results", format="default"):
     """Save quantized model.
 
@@ -46,6 +47,7 @@ def save(model, checkpoint_dir="saved_results", format="default"):
     # fp8_quant
     if isinstance(config_object, FP8Config):
         from neural_compressor.torch.algorithms import fp8_quant
+
         format = SaveLoadFormat.HUGGINGFACE.value  # TODO: support default format for FP8 algorithm
         fp8_quant.save(model, checkpoint_dir, format)
     else:
@@ -120,21 +122,26 @@ def load(model_name_or_path, original_model=None, format="default", device="cpu"
             ):  # WOQ
                 from neural_compressor.torch.algorithms import weight_only
 
-                qmodel = weight_only.load(model_name_or_path, original_model, format=SaveLoadFormat.DEFAULT, device=device)
+                qmodel = weight_only.load(
+                    model_name_or_path, original_model, format=SaveLoadFormat.DEFAULT, device=device
+                )
                 return qmodel.to(device)
     elif format == SaveLoadFormat.HUGGINGFACE.value:
         import transformers
+
         config = transformers.AutoConfig.from_pretrained(model_name_or_path, **kwargs)
         # use config to check which algorithm is used.
         if (
-            "fp8_config" in config.quantization_config or
+            "fp8_config" in config.quantization_config
+            or
             # for FP8 LLMs for vLLM (https://huggingface.co/neuralmagic).
             (
-                "quant_method" in config.quantization_config and
-                config.quantization_config["quant_method"] in ["fp8", "compressed-tensors"]
+                "quant_method" in config.quantization_config
+                and config.quantization_config["quant_method"] in ["fp8", "compressed-tensors"]
             )
         ):
             from neural_compressor.torch.algorithms import fp8_quant
+
             return fp8_quant.load(model_name_or_path, format=format, device=device, **kwargs)
         else:
             from neural_compressor.torch.algorithms import weight_only

diff --git a/neural_compressor/torch/utils/auto_accelerator.py b/neural_compressor/torch/utils/auto_accelerator.py
@@ -26,12 +26,12 @@
 
 import os
 from abc import ABC, abstractmethod
+from functools import lru_cache
 from typing import Any, Callable, List
 
 import torch
 
 from neural_compressor.common.utils import LazyImport, logger
-from functools import lru_cache
 
 htcore = LazyImport("habana_frameworks.torch.core")
 
@@ -151,7 +151,6 @@ def synchronize(self):
         pass
 
 
-
 @register_accelerator(name="cpu", priority=PRIORITY_CPU)
 class CPU_Accelerator(Auto_Accelerator):
     """CPU Accelerator."""

diff --git a/neural_compressor/torch/utils/block_wise.py b/neural_compressor/torch/utils/block_wise.py
@@ -11,23 +11,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""This utility is for block-wise calibration of LLMs"""
+"""This utility is for block-wise calibration of LLMs."""
 
-import torch
 import gc
 from functools import partial
 
+import torch
+
 from neural_compressor.torch.utils import (
     fetch_module,
-    logger,
-    set_module,
-    get_accelerator,
     forward_wrapper,
+    get_accelerator,
     get_non_persistent_buffers,
     load_non_persistent_buffers,
+    logger,
+    set_module,
 )
 
-
 cur_accelerator = get_accelerator()
 
 

diff --git a/neural_compressor/torch/utils/environ.py b/neural_compressor/torch/utils/environ.py
@@ -229,8 +229,9 @@ def is_tbb_available():  # pragma: no cover
 
 def get_used_hpu_mem_MB():
     """Get HPU used memory: MiB."""
-    from habana_frameworks.torch.hpu import memory_stats
     import numpy as np
+    from habana_frameworks.torch.hpu import memory_stats
+
     torch.hpu.synchronize()
     mem_stats = memory_stats()
     used_hpu_mem = np.round(mem_stats["InUse"] / 1024**2, 3)
@@ -240,6 +241,7 @@ def get_used_hpu_mem_MB():
 def get_used_cpu_mem_MB():
     """Get the amount of CPU memory used by the current process in MiB (Mebibytes)."""
     import psutil
+
     process = psutil.Process()
     mem_info = process.memory_info()
     used_cpu_mem = round(mem_info.rss / 1024**2, 3)

diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py
@@ -627,12 +627,11 @@ def find_matching_blocks(model, all_blocks, to_quant_block_names=None):
 
 
 def get_non_persistent_buffers(model):
-    """
-    Get all non-persistent buffers in the model.
-    
+    """Get all non-persistent buffers in the model.
+
     Args:
         model (torch.nn.Module): PyTorch model
-        
+
     Returns:
         dict: A dictionary containing all non-persistent buffers, {buffer_names: buffer_tensors}
     """
@@ -646,13 +645,11 @@ def get_non_persistent_buffers(model):
 
 
 def load_non_persistent_buffers(model, non_persistent_buffers):
-    """
-    Load all non-persistent buffers into the model.
-    
+    """Load all non-persistent buffers into the model.
+
     Args:
         model (torch.nn.Module): PyTorch model
         non_persistent_buffers (dict): A dictionary containing all non-persistent buffers, {buffer_names: buffer_tensors}
-
     """
     for full_name, buffer in non_persistent_buffers.items():
         module_name, buffer_name = full_name

diff --git a/test/3x/torch/quantization/fp8_quant/conftest.py b/test/3x/torch/quantization/fp8_quant/conftest.py
@@ -1,4 +1,4 @@
 # Called once at the beginning of the test session
 def pytest_sessionstart():
     import os
-    os.environ.setdefault("EXPERIMENTAL_WEIGHT_SHARING", "FALSE")
+    os.environ.setdefault("EXPERIMENTAL_WEIGHT_SHARING", "FALSE")