Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] authored and chensuyue committed Dec 19, 2024
1 parent c937884 commit 2186b0a
Show file tree
Hide file tree
Showing 12 changed files with 93 additions and 28 deletions.
14 changes: 14 additions & 0 deletions neural_compressor/torch/algorithms/fp8_quant/model_configs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict, Optional, Tuple, Any
from neural_compressor.torch.utils.auto_accelerator import auto_detect_accelerator

Expand Down
14 changes: 14 additions & 0 deletions neural_compressor/torch/algorithms/fp8_quant/observer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Base class and helper functions for registering observers."""

from typing import Dict, Optional, Any
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Base class for patched modules and helper functions for registering patched modules."""

from typing import Union, List, Type, Optional
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Base class and helper functions for registering scaling methods."""

from typing import Dict, Optional, Any
Expand Down
14 changes: 9 additions & 5 deletions neural_compressor/torch/algorithms/weight_only/save_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import torch

from neural_compressor.common.utils import save_config_mapping, AWQ, TEQ
from neural_compressor.common.utils import AWQ, TEQ, save_config_mapping
from neural_compressor.torch.utils import (
HPU_SAFE_WEIGHTS_NAME,
HPU_WEIGHT_NAME,
Expand All @@ -36,7 +36,10 @@
from .modules import HPUWeightOnlyLinear, INCWeightOnlyLinear, MulLinear
from .utility import convert_dtype_str2torch

format_woqlinear_mapping = {SaveLoadFormat.HUGGINGFACE: INCWeightOnlyLinear, SaveLoadFormat.DEFAULT: INCWeightOnlyLinear}
format_woqlinear_mapping = {
SaveLoadFormat.HUGGINGFACE: INCWeightOnlyLinear,
SaveLoadFormat.DEFAULT: INCWeightOnlyLinear,
}
device_woqlinear_mapping = {"cpu": INCWeightOnlyLinear, "hpu": HPUWeightOnlyLinear}


Expand Down Expand Up @@ -199,7 +202,7 @@ def load_inc_format_woq_model(self):
model = self._build_woq_model()

# load remaining pretrained weight to weight-only quantization model
is_meta_device = hasattr(self.original_model, "device") and self.original_model.device.type == 'meta'
is_meta_device = hasattr(self.original_model, "device") and self.original_model.device.type == "meta"
algo_name = next(iter(self.quantization_config[next(iter(self.quantization_config))].keys()))
if is_meta_device or algo_name in [AWQ, TEQ]:
# AWQ and TEQ will update some weight except WOQLinear to handle additional input_scale
Expand Down Expand Up @@ -297,7 +300,7 @@ def _load_data_to_new_module_hqq(self, new_module, module_name):
new_module_state_dict = {}
for key in self.loaded_state_dict:
if key.startswith(module_name):
new_key = key[len(module_name) + 1:] # Remove module_name and the following dot
new_key = key[len(module_name) + 1 :] # Remove module_name and the following dot
new_module_state_dict[new_key] = self.loaded_state_dict[key]
self.loaded_state_dict_keys.remove(key)
new_module.load_state_dict(new_module_state_dict, strict=False)
Expand Down Expand Up @@ -863,7 +866,7 @@ def _load_remaining_pretrained_weight(self, model):
for shard_file in resolved_archive_file:
state_dict = load_state_dict(shard_file)

params_dict={
params_dict = {
"model": model,
"state_dict": state_dict,
"start_prefix": "",
Expand All @@ -877,6 +880,7 @@ def _load_remaining_pretrained_weight(self, model):
}

import transformers

if transformers.__version__ < "4.45.0":
params_dict["loaded_state_dict_keys"] = self.loaded_state_dict_keys

Expand Down
2 changes: 1 addition & 1 deletion neural_compressor/torch/quantization/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from neural_compressor.common.base_config import BaseConfig, ComposableConfig, config_registry
from neural_compressor.common.utils import Mode, call_counter, log_process
from neural_compressor.torch.quantization.config import SmoothQuantConfig, INT8StaticQuantConfig
from neural_compressor.torch.quantization.config import INT8StaticQuantConfig, SmoothQuantConfig
from neural_compressor.torch.utils import is_ipex_available, logger
from neural_compressor.torch.utils.utility import WHITE_MODULE_LIST, algos_mapping, get_model_info

Expand Down
15 changes: 11 additions & 4 deletions neural_compressor/torch/quantization/save_load_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
FP8_QUANT: FP8Config,
}


def save(model, checkpoint_dir="saved_results", format="default"):
"""Save quantized model.
Expand All @@ -46,6 +47,7 @@ def save(model, checkpoint_dir="saved_results", format="default"):
# fp8_quant
if isinstance(config_object, FP8Config):
from neural_compressor.torch.algorithms import fp8_quant

format = SaveLoadFormat.HUGGINGFACE.value # TODO: support default format for FP8 algorithm
fp8_quant.save(model, checkpoint_dir, format)
else:
Expand Down Expand Up @@ -120,21 +122,26 @@ def load(model_name_or_path, original_model=None, format="default", device="cpu"
): # WOQ
from neural_compressor.torch.algorithms import weight_only

qmodel = weight_only.load(model_name_or_path, original_model, format=SaveLoadFormat.DEFAULT, device=device)
qmodel = weight_only.load(
model_name_or_path, original_model, format=SaveLoadFormat.DEFAULT, device=device
)
return qmodel.to(device)
elif format == SaveLoadFormat.HUGGINGFACE.value:
import transformers

config = transformers.AutoConfig.from_pretrained(model_name_or_path, **kwargs)
# use config to check which algorithm is used.
if (
"fp8_config" in config.quantization_config or
"fp8_config" in config.quantization_config
or
# for FP8 LLMs for vLLM (https://huggingface.co/neuralmagic).
(
"quant_method" in config.quantization_config and
config.quantization_config["quant_method"] in ["fp8", "compressed-tensors"]
"quant_method" in config.quantization_config
and config.quantization_config["quant_method"] in ["fp8", "compressed-tensors"]
)
):
from neural_compressor.torch.algorithms import fp8_quant

return fp8_quant.load(model_name_or_path, format=format, device=device, **kwargs)
else:
from neural_compressor.torch.algorithms import weight_only
Expand Down
3 changes: 1 addition & 2 deletions neural_compressor/torch/utils/auto_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@

import os
from abc import ABC, abstractmethod
from functools import lru_cache
from typing import Any, Callable, List

import torch

from neural_compressor.common.utils import LazyImport, logger
from functools import lru_cache

htcore = LazyImport("habana_frameworks.torch.core")

Expand Down Expand Up @@ -151,7 +151,6 @@ def synchronize(self):
pass



@register_accelerator(name="cpu", priority=PRIORITY_CPU)
class CPU_Accelerator(Auto_Accelerator):
"""CPU Accelerator."""
Expand Down
12 changes: 6 additions & 6 deletions neural_compressor/torch/utils/block_wise.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,23 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This utility is for block-wise calibration of LLMs"""
"""This utility is for block-wise calibration of LLMs."""

import torch
import gc
from functools import partial

import torch

from neural_compressor.torch.utils import (
fetch_module,
logger,
set_module,
get_accelerator,
forward_wrapper,
get_accelerator,
get_non_persistent_buffers,
load_non_persistent_buffers,
logger,
set_module,
)


cur_accelerator = get_accelerator()


Expand Down
4 changes: 3 additions & 1 deletion neural_compressor/torch/utils/environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,9 @@ def is_tbb_available(): # pragma: no cover

def get_used_hpu_mem_MB():
"""Get HPU used memory: MiB."""
from habana_frameworks.torch.hpu import memory_stats
import numpy as np
from habana_frameworks.torch.hpu import memory_stats

torch.hpu.synchronize()
mem_stats = memory_stats()
used_hpu_mem = np.round(mem_stats["InUse"] / 1024**2, 3)
Expand All @@ -240,6 +241,7 @@ def get_used_hpu_mem_MB():
def get_used_cpu_mem_MB():
"""Get the amount of CPU memory used by the current process in MiB (Mebibytes)."""
import psutil

process = psutil.Process()
mem_info = process.memory_info()
used_cpu_mem = round(mem_info.rss / 1024**2, 3)
Expand Down
13 changes: 5 additions & 8 deletions neural_compressor/torch/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,12 +627,11 @@ def find_matching_blocks(model, all_blocks, to_quant_block_names=None):


def get_non_persistent_buffers(model):
"""
Get all non-persistent buffers in the model.
"""Get all non-persistent buffers in the model.
Args:
model (torch.nn.Module): PyTorch model
Returns:
dict: A dictionary containing all non-persistent buffers, {buffer_names: buffer_tensors}
"""
Expand All @@ -646,13 +645,11 @@ def get_non_persistent_buffers(model):


def load_non_persistent_buffers(model, non_persistent_buffers):
"""
Load all non-persistent buffers into the model.
"""Load all non-persistent buffers into the model.
Args:
model (torch.nn.Module): PyTorch model
non_persistent_buffers (dict): A dictionary containing all non-persistent buffers, {buffer_names: buffer_tensors}
"""
for full_name, buffer in non_persistent_buffers.items():
module_name, buffer_name = full_name
Expand Down
2 changes: 1 addition & 1 deletion test/3x/torch/quantization/fp8_quant/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Called once at the beginning of the test session
def pytest_sessionstart():
import os
os.environ.setdefault("EXPERIMENTAL_WEIGHT_SHARING", "FALSE")
os.environ.setdefault("EXPERIMENTAL_WEIGHT_SHARING", "FALSE")

0 comments on commit 2186b0a

Please sign in to comment.