Step 1. ํ์ Library import:
import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
import transformers
from peft import (
LoraConfig,
PeftConfig,
PeftModel,
get_peft_model,
)
from transformers import (
AutoConfig,
AutoModel,
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
set_seed,
pipeline,
TrainingArguments,
)โ
Step 2. ๋ชจ๋ธ ๋ถ๋ฌ์จ ํ prepare_model_for_kbit_training(model) ์งํ
devices = [0]#[0, 3]
max_memory = {i: '49140MiB' for i in devices}
model_name = 'OpenGVLab/InternVL2-8B'
model = AutoModelForCausalLM.from_pretrained(
model_name,
cache_dir='/data/huggingface_models',
trust_remote_code=True,
device_map="auto",
max_memory=max_memory,
quantization_config=BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type='nf4'
),
)
# ๋ชจ๋ธ ๊ตฌ์กฐ ์ถ๋ ฅ
print(model)
# get_input_embeddings ๋ฉ์๋๋ฅผ ๋ชจ๋ธ์ ์ถ๊ฐ
def get_input_embeddings(self):
if hasattr(self, 'embed_tokens'):
return self.embed_tokens
elif hasattr(self, 'language_model') and hasattr(self.language_model.model, 'tok_embeddings'):
return self.language_model.model.tok_embeddings
else:
raise NotImplementedError("The model does not have an attribute 'embed_tokens' or 'language_model.model.tok_embeddings'.")
model.get_input_embeddings = get_input_embeddings.__get__(model, type(model))
# prepare_model_for_kbit_training ํจ์๋ฅผ ์ง์ ๊ตฌํ
def prepare_model_for_kbit_training(model):
for param in model.parameters():
param.requires_grad = False # ๋ชจ๋ ํ๋ผ๋ฏธํฐ์ ๊ธฐ์ธ๊ธฐ ๊ณ์ฐ์ ๋นํ์ฑํ
if hasattr(model, 'model') and hasattr(model.model, 'tok_embeddings'):
for param in model.model.tok_embeddings.parameters():
param.requires_grad = True # ์๋ฒ ๋ฉ ๋ ์ด์ด๋ง ๊ธฐ์ธ๊ธฐ ๊ณ์ฐ ํ์ฑํ
elif hasattr(model, 'embed_tokens'):
for param in model.embed_tokens.parameters():
param.requires_grad = True # ์๋ฒ ๋ฉ ๋ ์ด์ด๋ง ๊ธฐ์ธ๊ธฐ ๊ณ์ฐ ํ์ฑํ
# ํ์ํ ๊ฒฝ์ฐ ๋ค๋ฅธ ํน์ ๋ ์ด์ด๋ค๋ ๊ธฐ์ธ๊ธฐ ๊ณ์ฐ์ ํ์ฑํํ ์ ์์
# ์์:
# if hasattr(model, 'some_other_layer'):
# for param in model.some_other_layer.parameters():
# param.requires_grad = True
return model
model = prepare_model_for_kbit_training(model)โ
Step 3. QLoRA๋ฅผ ๋ถ์ผ layer ์ ํ:
def find_all_linear_names(model, train_mode):
assert train_mode in ['lora', 'qlora']
cls = bnb.nn.Linear4bit if train_mode == 'qlora' else nn.Linear
lora_module_names = set()
for name, module in model.named_modules():
if isinstance(module, cls):
names = name.split('.')
lora_module_names.add(names[0] if len(names) == 1 else names[-1])
if 'lm_head' in lora_module_names: # LLM์ Head๋ถ๋ถ์ ์ํ๋ ์ ๋ค pass
lora_module_names.remove('lm_head')
return list(lora_module_names)
print(sorted(config.target_modules)) # ['1','output', 'w1', 'w2', 'w3', 'wo', 'wqkv']
config.target_modules.remove('1') # LLM์ Head๋ถ๋ถ์ ์ํ๋ ์ ๋ค ์ ๊ฑฐ
config = LoraConfig(
r=16,
lora_alpha=16,
target_modules=find_all_linear_names(model, 'qlora'),
lora_dropout=0.05,
bias="none",
task_type="QUESTION_ANS" #CAUSAL_LM, FEATURE_EXTRACTION, QUESTION_ANS, SEQ_2_SEQ_LM, SEQ_CLS, TOKEN_CLS.
)
model = get_peft_model(model, config)
์ดํ trainer๋ก train์งํ.
QLoRA ๋ถ์ธ ๊ฒฐ๊ณผ: