Skip to content
Snippets Groups Projects
Commit a5878300 authored by Julien Breton's avatar Julien Breton
Browse files

add Miqu

parent 67a8de73
No related branches found
No related tags found
No related merge requests found
import importlib
import json
import sys
from alive_progress import alive_bar
import torch
import transformers
import bitsandbytes, flash_attn
import time
# Record the start time
start_time = time.time()
spec = importlib.util.spec_from_file_location("utils", "../utils.py")
utils = importlib.util.module_from_spec(spec)
spec.loader.exec_module(utils)
model_id = "../../../models/Miqu-1-70b"
model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
load_in_8bit=False,
load_in_4bit=True,
attn_implementation="flash_attention_2"
)
model.eval()
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
generate_text = transformers.pipeline(
model=model, tokenizer=tokenizer,
return_full_text=False, # if using langchain set True
task="text-generation",
# we pass model parameters here too
do_sample=True,
temperature=0.5, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
top_p=0.15, # select from top tokens whose probability add up to 15%
top_k=0, # select from top 0 tokens (because zero, relies on top_p)
max_new_tokens=2048, # max number of tokens to generate in the output
repetition_penalty=1.0 # if output begins repeating increase
)
def instruction_format(sys_message: str, query: str):
# note, don't "</s>" to the end
return f'<s> [INST] {sys_message} [/INST]\nUser: {query}\nAssistant: '
with open('../../../data/evalQS.json', 'r', encoding='utf-8') as file:
loaded = json.load(file)
input = []
output = {}
with alive_bar(len(loaded)) as bar:
for sentence in loaded:
input.append(instruction_format(utils.get_pre_prompt_few_shot(), sentence))
bar()
print("Input creation finished")
res = generate_text(input)
i = 0
for sentence in loaded:
output[sentence] = res[i][0]["generated_text"]
i += 1
with open('../../../results/LLM/Miqu-1-70b/MIQU_few_shot_raw_answers.json', 'w', encoding='utf-8') as file:
json.dump(output, file)
print("========== Program finished ==========")
# Record the end time
end_time = time.time()
# Calculate the execution time
execution_time = end_time - start_time
# Calculate hours, minutes, and seconds
hours = execution_time // 3600
minutes = (execution_time % 3600) // 60
seconds = execution_time % 60
# Print the execution time in hours, minutes, and seconds
print(f"The program ran for {int(hours)} hours, {int(minutes)} minutes, and {seconds:.2f} seconds.")
\ No newline at end of file
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging, TextStreamer
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os, torch, platform, warnings, json
from datasets import Dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
import pandas as pd
import importlib
import transformers
from alive_progress import alive_bar
import time
# Record the start time
start_time = time.time()
spec = importlib.util.spec_from_file_location("utils", "../utils.py")
utils = importlib.util.module_from_spec(spec)
spec.loader.exec_module(utils)
def fine_tune(base_model, new_model):
path = '../../../data/finetuned_dataset.csv'
dataframe = pd.read_csv(path)
dataframe = dataframe.replace('<s> ', '', regex=True)
dataframe['concat'] = dataframe['input'].astype(str) + dataframe['output'].astype(str)
dataset = Dataset.from_pandas(dataframe, split="train")
# Load base model
bnb_config = BitsAndBytesConfig(
load_in_4bit= True,
bnb_4bit_quant_type= "nf4",
bnb_4bit_compute_dtype= torch.bfloat16,
bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
base_model,
quantization_config=bnb_config,
device_map={"": 0}
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
r=16,
lora_alpha=16,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj"]
)
model = get_peft_model(model, peft_config)
# Training Arguments
# Hyperparameters should beadjusted based on the hardware you using
training_arguments = TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=2,
num_train_epochs=6,
learning_rate=1e-4,
logging_steps=2,
optim="adamw_torch",
save_strategy="steps",
output_dir="./results"
)
# Setting sft parameters
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
peft_config=peft_config,
max_seq_length= None,
dataset_text_field="concat",
tokenizer=tokenizer,
args=training_arguments,
packing= False,
)
trainer.train()
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
model.config.use_cache = True
model.eval()
def generate(base_model, new_model):
base_model_reload = transformers.AutoModelForCausalLM.from_pretrained(
base_model,
device_map="auto",
load_in_8bit=False,
load_in_4bit=True,
attn_implementation="flash_attention_2"
)
model = PeftModel.from_pretrained(base_model_reload, new_model)
model = model.merge_and_unload()
tokenizer = transformers.AutoTokenizer.from_pretrained(base_model)
generate_text = transformers.pipeline(
model=model, tokenizer=tokenizer,
return_full_text=False, # if using langchain set True
task="text-generation",
# we pass model parameters here too
do_sample=True,
temperature=0.5, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
top_p=0.15, # select from top tokens whose probability add up to 15%
top_k=0, # select from top 0 tokens (because zero, relies on top_p)
max_new_tokens=2048, # max number of tokens to generate in the output
repetition_penalty=1.0 # if output begins repeating increase
)
def instruction_format(sys_message: str, query: str):
# note, don't "</s>" to the end
return f'<s> [INST] {sys_message} [/INST]\nUser: {query}\nAssistant: '
with open('../../../data/evalQS.json', 'r', encoding='utf-8') as file:
loaded = json.load(file)
input = []
output = {}
with alive_bar(len(loaded)) as bar:
for sentence in loaded:
input.append(instruction_format(utils.get_pre_prompt_zero_shot(), sentence))
bar()
print("Input creation finished")
res = generate_text(input)
i = 0
for sentence in loaded:
output[sentence] = res[i][0]["generated_text"]
i += 1
with open('../../../results/LLM/Miqu-1-70b/MIQU_fine_tuned_raw_answers.json', 'w', encoding='utf-8') as file:
json.dump(output, file)
#######################################################################################################################
base_model = "../../../models/Miqu-1-70b"
new_model = "../../../models/Fine-tuned_Miqu-1-70b"
fine_tune(base_model, new_model)
generate(base_model, new_model)
print("========== Program finished ==========")
# Record the end time
end_time = time.time()
# Calculate the execution time
execution_time = end_time - start_time
# Calculate hours, minutes, and seconds
hours = execution_time // 3600
minutes = (execution_time % 3600) // 60
seconds = execution_time % 60
# Print the execution time in hours, minutes, and seconds
print(f"The program ran for {int(hours)} hours, {int(minutes)} minutes, and {seconds:.2f} seconds.")
\ No newline at end of file
import importlib
import json
import sys
from alive_progress import alive_bar
import torch
import transformers
import bitsandbytes, flash_attn
import time
# Record the start time
start_time = time.time()
spec = importlib.util.spec_from_file_location("utils", "../utils.py")
utils = importlib.util.module_from_spec(spec)
spec.loader.exec_module(utils)
model_id = "../../../models/Miqu-1-70b"
model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
load_in_8bit=False,
load_in_4bit=True,
attn_implementation="flash_attention_2"
)
model.eval()
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
generate_text = transformers.pipeline(
model=model, tokenizer=tokenizer,
return_full_text=False, # if using langchain set True
task="text-generation",
# we pass model parameters here too
do_sample=True,
temperature=0.5, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
top_p=0.15, # select from top tokens whose probability add up to 15%
top_k=0, # select from top 0 tokens (because zero, relies on top_p)
max_new_tokens=2048, # max number of tokens to generate in the output
repetition_penalty=1.0 # if output begins repeating increase
)
def instruction_format(sys_message: str, query: str):
# note, don't "</s>" to the end
return f'<s> [INST] {sys_message} [/INST]\nUser: {query}\nAssistant: '
with open('../../../data/evalQS.json', 'r', encoding='utf-8') as file:
loaded = json.load(file)
input = []
output = {}
with alive_bar(len(loaded)) as bar:
for sentence in loaded:
input.append(instruction_format(utils.get_pre_prompt_zero_shot(), sentence))
bar()
print("Input creation finished")
res = generate_text(input)
i = 0
for sentence in loaded:
output[sentence] = res[i][0]["generated_text"]
i += 1
with open('../../../results/LLM/Miqu-1-70b/MIQU_zero_shot_raw_answers.json', 'w', encoding='utf-8') as file:
json.dump(output, file)
print("========== Program finished ==========")
# Record the end time
end_time = time.time()
# Calculate the execution time
execution_time = end_time - start_time
# Calculate hours, minutes, and seconds
hours = execution_time // 3600
minutes = (execution_time % 3600) // 60
seconds = execution_time % 60
# Print the execution time in hours, minutes, and seconds
print(f"The program ran for {int(hours)} hours, {int(minutes)} minutes, and {seconds:.2f} seconds.")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment