Skip to content
Snippets Groups Projects
Commit 092057cf authored by Julien Breton's avatar Julien Breton
Browse files

refactor project

parent ecbd50fe
Branches
No related tags found
No related merge requests found
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging, TextStreamer
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os, torch, platform, warnings
from datasets import Dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
import pandas as pd
base_model = "../../../models/Mistral-7B-Instruct-v0.2"
path = '../../../data/finetuned_dataset.csv'
dataframe = pd.read_csv(path)
dataframe = dataframe.replace('<s> ', '', regex=True)
dataframe['concat'] = dataframe['input'].astype(str) + dataframe['output'].astype(str)
dataset = Dataset.from_pandas(dataframe, split="train")
# Load base model
bnb_config = BitsAndBytesConfig(
load_in_4bit= True,
bnb_4bit_quant_type= "nf4",
bnb_4bit_compute_dtype= torch.bfloat16,
bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
base_model,
quantization_config=bnb_config,
device_map={"": 0}
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
r=16,
lora_alpha=16,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = get_peft_model(model, peft_config)
# Training Arguments
# Hyperparameters should beadjusted based on the hardware you using
training_arguments = TrainingArguments(
output_dir= "./results",
num_train_epochs= 1,
per_device_train_batch_size= 8,
gradient_accumulation_steps= 2,
optim = "paged_adamw_8bit",
save_steps= 5000,
logging_steps= 30,
learning_rate= 2e-4,
weight_decay= 0.001,
fp16= False,
bf16= False,
max_grad_norm= 0.3,
max_steps= -1,
warmup_ratio= 0.3,
group_by_length= True,
lr_scheduler_type= "constant",
)
# Setting sft parameters
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
peft_config=peft_config,
max_seq_length= None,
dataset_text_field="concat",
tokenizer=tokenizer,
args=training_arguments,
packing= False,
)
trainer.train()
# Save the fine-tuned model
trainer.model.save_pretrained('../../../models/Fine-tuned_Mistral-7B')
model.config.use_cache = True
model.eval()
import torch
import transformers
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel
tokenizer = AutoTokenizer.from_pretrained("../../../models/Mixtral-8x7B-Instruct-v0.1")
model = AutoModelForCausalLM.from_pretrained("../../../models/Mixtral-8x7B-Instruct-v0.1",
load_in_4bit=True,
torch_dtype=torch.float16,
device_map="auto",
)
# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)
tokenizer.pad_token = "!" #Not EOS, will explain another time.\
LORA_R = 8
LORA_ALPHA = 2 * LORA_R
LORA_DROPOUT = 0.1
config = LoraConfig(
r=LORA_R,
lora_alpha=LORA_ALPHA,
target_modules=[ "w1", "w2", "w3"], #just targetting the MoE layers.
lora_dropout=LORA_DROPOUT,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
dataset = load_dataset('csv', data_files='../../../data/finetuned_dataset.csv')
train_data = dataset["train"] # Not using evaluation data
def generate_prompt(user_query):
p = f"{user_query['input']}{user_query['output']}</s>"
return p
def tokenize(prompt):
return tokenizer(
prompt + tokenizer.eos_token,
truncation=True,
max_length= None ,
padding="max_length"
)
train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)), remove_columns=["input" , "output"])
trainer = Trainer(
model=model,
train_dataset=train_data,
args=TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
num_train_epochs=6,
learning_rate=1e-4,
logging_steps=2,
optim="adamw_torch",
save_strategy="epoch",
output_dir="./results"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()
# Save the fine-tuned model
trainer.model.save_pretrained('../../../models/Fine-tuned_Mixtral-8x7b')
model.eval()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment