Skip to content

Commit

Permalink
Revert "some changes to support fine-tuning on Intel GPU (#88)" (#95)
Browse files Browse the repository at this point in the history
This reverts commit a555e0c.
  • Loading branch information
xwu99 authored Feb 4, 2024
1 parent bcd5d08 commit 63464ed
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 102 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/workflow_finetune.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ on:
default: '10.1.2.13:5000/llmray-build'
http_proxy:
type: string
default: 'http://10.24.221.149:911'
default: 'http://proxy-chain.intel.com:911'
https_proxy:
type: string
default: 'http://10.24.221.149:911'
default: 'http://proxy-chain.intel.com:911'
runner_config_path:
type: string
default: '/home/ci/llm-ray-actions-runner'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/workflow_inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ on:
default: '10.1.2.13:5000/llmray-build'
http_proxy:
type: string
default: 'http://10.24.221.149:911'
default: 'http://proxy-chain.intel.com:911'
https_proxy:
type: string
default: 'http://10.24.221.149:911'
default: 'http://proxy-chain.intel.com:911'
runner_config_path:
type: string
default: '/home/ci/llm-ray-actions-runner'
Expand Down
13 changes: 1 addition & 12 deletions common/model/huggingface_model_for_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,13 @@
class HuggingFaceModelForCausalLM(Model):
def __call__(self, config):
name = config.get("name")
model_dtype = config.get("dtype")
model_config = config.get("config", {})
model = transformers.AutoModelForCausalLM.from_pretrained(
name, torch_dtype=model_dtype, **model_config
)

model = transformers.AutoModelForCausalLM.from_pretrained(name, **model_config)
lora_config = config.get("lora_config", None)
if lora_config:
peft_config = LoraConfig(**lora_config)
model = get_peft_model(model, peft_config)
deltatuner_config = config.get("deltatuner_config", None)
if deltatuner_config:
model = deltatuner.optimize(model, **deltatuner_config)

enable_gradient_checkpointing = config.get("enable_gradient_checkpointing")
if enable_gradient_checkpointing:
model.enable_input_require_grads()
model.gradient_checkpointing_enable()
model.config.use_cache = False

return model
23 changes: 5 additions & 18 deletions common/trainer/default_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ def _get_lr_scheduler(
num_steps_per_epoch,
accelerator,
):
# gradient_accumulation_steps = accelerator.gradient_accumulation_steps
# num_update_steps_per_epoch = math.ceil(num_steps_per_epoch / gradient_accumulation_steps)
enable = lr_scheduler_config.get("enable", False)
if not enable:
return None
Expand Down Expand Up @@ -151,7 +153,7 @@ def prepare(self, model, tokenizer, dataset, optimizer, accelerator):
def train(self):
num_train_epochs = self.config.get("num_train_epochs", 1)
checkpoint = self.config.get("checkpoint")
logging_steps = self.config.get("logging_steps", 1)
log_step = self.config.get("log_step", 1)
max_train_step = self.config.get("max_train_step")
max_eval_step = self.config.get("max_eval_step")
for idx in range(self.starting_epoch, num_train_epochs, 1):
Expand All @@ -168,17 +170,12 @@ def train(self):
if self.lr_scheduler is not None:
self.lr_scheduler.step()
self.optimizer.zero_grad()

if step % logging_steps == 0:
loss = loss.item()
ppl = math.exp(loss)
if step % log_step == 0:
logger.info(
f"train epoch:[{idx}/{num_train_epochs}]\tstep:[{step}/{total_steps}]\tloss:{loss:.6f}\tppl:{ppl:.6f}\ttime:{time.time()-start:.6f}"
f"train epoch:[{idx}/{num_train_epochs}]\tstep:[{step}/{total_steps}]\tloss:{loss:.6f}\tppl:{math.exp(loss):.6f}\ttime:{time.time()-start:.6f}"
)
report(
{
"loss": loss,
"ppl": ppl,
"train_epoch": idx,
"total_epochs": num_train_epochs,
"train_step": step,
Expand All @@ -187,10 +184,6 @@ def train(self):
else total_steps,
}
)
self.accelerator.log(
{"train loss": loss, "train perplexity": ppl},
step=idx * total_steps + step,
)
start = time.time()
if max_train_step is not None:
if step >= max_train_step - 1:
Expand Down Expand Up @@ -221,9 +214,6 @@ def train(self):
except OverflowError:
eval_loss = float("inf")
perplexity = float("inf")
self.accelerator.log(
{"evaluate loss": eval_loss, "evaluate perplexity": perplexity}
)
logger.info(
f"eval epoch:[{idx}/{num_train_epochs}]\tloss:[{eval_loss:.6f}]\tppl:[{perplexity:.6f}]\ttime:[{time.time()-start:.6f}]"
)
Expand All @@ -242,9 +232,6 @@ def train(self):
save_function=self.accelerator.save,
)
logger.info(f"finish save model to {output}")

self.accelerator.end_training()

self.accelerator.wait_for_everyone()

def _get_local_path(self, root_path, model_name):
Expand Down
4 changes: 2 additions & 2 deletions common/trainer/rm_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def compute_loss(self, batch, return_outputs=False):

def train(self):
num_train_epochs = self.config.get("num_train_epochs", 1)
logging_steps = self.config.get("logging_steps", 1)
log_step = self.config.get("log_step", 1)
if not os.path.exists(self.config.get("log_path", ".")):
os.makedirs(self.config.get("log_path", "."), exist_ok=True)
writer = SummaryWriter(self.config.get("log_path", "."))
Expand All @@ -69,7 +69,7 @@ def train(self):
if self.lr_scheduler is not None:
self.lr_scheduler.step()
self.optimizer.zero_grad()
if step % logging_steps == 0:
if step % log_step == 0:
logger.info(
f"train epoch:[{idx}/{num_train_epochs}]\tstep:[{step}/{len(self.train_dataloader)}]\tloss:{loss}\tppl:{math.exp(loss)}\ttime:{time.time()-start}"
)
Expand Down
3 changes: 0 additions & 3 deletions docs/finetune_parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@ The following are the parameters supported in the finetuning workflow.
|gpt_base_model|True|This parameter is for [Transformers#22482](https://github.com/huggingface/transformers/issues/22482). It needs to be set to True when the pretrained model is realted to gpt, otherwise it is False.|
|output_dir|/tmp/llm-ray/output|The output directory to store the finetuned model|
|checkpoint_dir|/tmp/llm-ray/checkpoint|The directory to store checkpoint|
|tracking_dir|/tmp/llm-ray/tracking|The path to a directory for storing logs of locally-compatible loggers|
|config|trust_remote_code: False<br> use_auth_token: None|Will be passed to the transformers `from_pretrained()` method|
|lora_config|task_type: CAUSAL_LM<br>r: 8<br>lora_alpha: 32<br>lora_dropout: 0.1|Will be passed to the LoraConfig `__init__()` method, then it'll be used as config to build Peft model object.|
|deltatuner_config|"algo": "lora"<br>"denas": True<br>"best_model_structure": "/path/to/best_structure_of_deltatuner_model"|Will be passed to the DeltaTunerArguments `__init__()` method, then it'll be used as config to build [Deltatuner model](https://github.com/intel/e2eAIOK/tree/main/e2eAIOK/deltatuner) object.|
|enable_gradient_checkpointing|False|enable gradient checkpointing to save GPU memory, but will cost more compute runtime|


## Dataset Parameters
Expand Down Expand Up @@ -42,4 +40,3 @@ The following are the parameters supported in the finetuning workflow.
|max_train_steps|None|Total number of training steps to perform. If provided, overrides epochs.|
|gradient_accumulation_steps|1|Number of updates steps to accumulate before performing a backward/update pass.|
|seed|None|A seed for reproducible training.|
|logging_steps|10|logging per steps|
53 changes: 9 additions & 44 deletions finetune/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import argparse
from typing import Any, Dict, Union

import torch
import accelerate
from accelerate.utils import is_xpu_available

Expand Down Expand Up @@ -63,14 +62,6 @@ def get_accelerate_environment_variable(mode: str, config: Union[Dict[str, Any],
return mode_env_vars[mode]


def convert_dtype(dtype: str) -> torch.dtype:
supported_dtypes = {"fp16": torch.float16, "bf16": torch.bfloat16, "fp32": torch.float32}
if dtype in supported_dtypes:
return supported_dtypes[dtype]
else:
raise ValueError(f"only supported torch.dtype list [{supported_dtypes.keys()}]")


def train_func(config: Dict[str, Any]):
cwd = config.get("cwd")
if cwd:
Expand All @@ -88,26 +79,9 @@ def train_func(config: Dict[str, Any]):
)
else:
fsdp_plugin = None

log_with = "tensorboard" # only support tensorboard as tracker
output_dir = config["General"]["output_dir"]
tracking_dir = config["General"]["tracking_dir"]
accelerator = accelerate.Accelerator(
gradient_accumulation_steps=gradient_accumulation_steps,
fsdp_plugin=fsdp_plugin,
log_with=log_with,
project_dir=tracking_dir,
gradient_accumulation_steps=gradient_accumulation_steps, fsdp_plugin=fsdp_plugin
)
epochs = config["Training"]["epochs"]
tracker_config = {
"epochs": epochs,
"learning_rate": config["Training"]["learning_rate"],
"batch_size": config["Training"]["batch_size"],
}
base_model = config["General"]["base_model"]
dataset_file = config["Dataset"]["train_file"]
accelerator.init_trackers("fine-tuning", config=tracker_config)

common.logger.info(
f"accelerator generate finish, accelerator device type = {accelerator.device}"
)
Expand All @@ -118,25 +92,23 @@ def train_func(config: Dict[str, Any]):

datasets = common.dataset.Dataset.registory.get("HuggingfaceDataset")()(
config={
"name": dataset_file,
"name": config["Dataset"]["train_file"],
"validation_file": config["Dataset"]["validation_file"],
"validation_split_percentage": config["Dataset"]["validation_split_percentage"],
}
)

tokenizer = common.tokenizer.Tokenizer.registory.get("HuggingFaceTokenizer")()(
config={
"name": base_model,
"name": config["General"]["base_model"],
"config": config["General"]["config"],
}
)

model = common.model.Model.registory.get("HuggingFaceModelForCausalLM")()(
config={
"name": base_model,
"dtype": convert_dtype(config["Training"]["mixed_precision"]),
"name": config["General"]["base_model"],
"config": config["General"]["config"],
"enable_gradient_checkpointing": config["General"]["enable_gradient_checkpointing"],
"lora_config": config["General"]["lora_config"]
if config["General"].get("lora_config")
else None,
Expand All @@ -153,10 +125,10 @@ def train_func(config: Dict[str, Any]):

trainer = common.trainer.Trainer.registory.get("DefaultTrainer")(
config={
"num_train_epochs": epochs,
"num_train_epochs": config["Training"]["epochs"],
"max_train_step": config["Training"].get("max_train_steps", None),
"logging_steps": config["Training"].get("logging_steps", 1),
"output": output_dir,
"log_step": 1,
"output": config["General"]["output_dir"],
"dataprocesser": {
"type": "GeneralProcesser",
"per_device_train_batch_size": config["Training"]["batch_size"],
Expand Down Expand Up @@ -245,21 +217,14 @@ def main(external_config=None):
"FI_PROVIDER": "tcp",
}
}

accelerate_env_vars = get_accelerate_environment_variable(accelerate_mode, config)
runtime_env["env_vars"].update(accelerate_env_vars)

if config["General"]["gpt_base_model"] is True:
runtime_env["pip"] = ["transformers==4.26.0"]

import intel_extension_for_pytorch as ipex

if "xpu" in ipex.__version__:
num_cpus = (
resources_per_worker["CPU"] * num_training_workers + 1
) # additional 1 for head worker
ray.init(num_cpus=num_cpus, runtime_env=runtime_env)
else:
ray.init(runtime_env=runtime_env)
ray.init(runtime_env=runtime_env)

common.logger.info(f"ray available resources = {ray.available_resources()}")

Expand Down
4 changes: 0 additions & 4 deletions finetune/finetune.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ General:
gpt_base_model: true
output_dir: /tmp/llm-ray/output
checkpoint_dir: /tmp/llm-ray/checkpoint
tracking_dir: /tmp/llm-ray/tracking
config:
trust_remote_code: false
use_auth_token: null
Expand All @@ -12,7 +11,6 @@ General:
r: 8
lora_alpha: 32
lora_dropout: 0.1
enable_gradient_checkpointing: false
Dataset:
train_file: examples/data/sample_finetune_data_small.jsonl
validation_file: null
Expand All @@ -30,5 +28,3 @@ Training:
resources_per_worker:
CPU: 32
accelerate_mode: CPU_DDP
gradient_accumulation_steps: 2
logging_steps: 10
9 changes: 0 additions & 9 deletions finetune/finetune_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,9 @@ class General(BaseModel):
gpt_base_model: bool
output_dir: str
checkpoint_dir: str
tracking_dir: str
config: GeneralConfig
lora_config: Optional[LoraConfig] = None
deltatuner_config: Optional[DeltatunerConfig] = None
enable_gradient_checkpointing: bool = False


class Dataset(BaseModel):
Expand All @@ -56,8 +54,6 @@ class Training(BaseModel):
resources_per_worker: RayResourceConfig
accelerate_mode: str
mixed_precision: str = "no"
gradient_accumulation_steps: int
logging_steps: int = 10

@validator("device")
def check_device(cls, v: str):
Expand All @@ -73,11 +69,6 @@ def check_accelerate_mode(cls, v: str):
raise ValueError(f"accelerate_mode must be one of {modes}")
return v

@validator("logging_steps")
def check_logging_steps(cls, v: int):
assert v > 0
return v

# @model_validator(mode='after')
# def check_device_and_accelerate_mode(self) -> "Training":
# dev = self.device
Expand Down
12 changes: 6 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ dependencies = [
"peft>=0.4.0",
"deltatuner==1.1.9",
"py-cpuinfo",
"pydantic-yaml"
"pydantic-yaml",
]

[project.optional-dependencies]
Expand All @@ -48,11 +48,11 @@ cpu = [

gpu = [
"transformers>=4.35.0",
"torch==2.1.0a0",
"torchvision==0.16.0a0",
"intel_extension_for_pytorch==2.1.10+xpu",
"oneccl_bind_pt==2.1.100+xpu",
"dpctl==0.15.0"
"torch==2.0.1a0",
"torchvision==0.15.2a0",
"intel-extension-for-pytorch==2.0.110+xpu",
"oneccl_bind_pt==2.0.100+gpu",
"dpctl==0.14.5"
]

deepspeed = [
Expand Down

0 comments on commit 63464ed

Please sign in to comment.