Skip to content
Snippets Groups Projects
Commit 88903de0 authored by Julien Breton's avatar Julien Breton
Browse files

creating graph & fine-tuned Mixtral

parent 092057cf
No related branches found
No related tags found
No related merge requests found
......@@ -2,16 +2,20 @@ import json
import numpy as np
import matplotlib.pyplot as plt
def graph_system_evolution_eval(ref_path, new_path, title, output_path):
bars = [[], [], []]
for i in range(len(ref_path)):
def graph_system_evolution_eval(config, title, output_path):
bars = []
for i in range(len(config)):
bars.append([])
for i in range(len(config)):
sum_perfect_equals = 0
sum_subpart = 0
sum_miss_classification = 0
sum_hallucination = 0
with open(new_path[i]) as file:
with open(config[i]['new']) as file:
data = json.load(file)
data = data["break_down"]
for tag, values in data.items():
......@@ -20,7 +24,7 @@ def graph_system_evolution_eval(ref_path, new_path, title, output_path):
sum_miss_classification += values['miss_classification']
sum_hallucination += values['hallucination']
with open(ref_path[i]) as file:
with open(config[i]['ref']) as file:
data = json.load(file)
data = data["break_down"]
for tag, values in data.items():
......@@ -34,40 +38,39 @@ def graph_system_evolution_eval(ref_path, new_path, title, output_path):
bars[i].append(sum_miss_classification)
bars[i].append(sum_hallucination)
# set width of bars
barWidth = 0.25
r_list = [np.arange(len(bars[0]))]
# Set position of bar on X axis
r1 = np.arange(len(bars[0]))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
for i in range(1, len(config)):
r_list.append([x + barWidth for x in r_list[i-1]])
# Make the plot
plt.bar(r1, bars[0], color='#75ac9d', width=barWidth, edgecolor='white', label='GPT-4')
plt.bar(r2, bars[1], color='#fca301', width=barWidth, edgecolor='white', label='Mixtral-8x7b')
plt.bar(r3, bars[2], color='#5619d8', width=barWidth, edgecolor='white', label='Mistral-7b')
for i in range(len(config)):
plt.bar(r_list[i], bars[i], color=config[i]['color'], width=barWidth, edgecolor='white', label=config[i]['title'])
# Add xticks on the middle of the group bars
plt.ylabel('Number of predicate')
plt.xlabel(title, fontweight='bold')
plt.xticks([r + barWidth for r in range(len(bars[0]))], ['Perfect equals', 'Subpart', 'Miss classification', 'Others'])
plt.xticks([r + barWidth for r in range(len(bars[0]))],
['Perfect equals', 'Subpart', 'Miss classification', 'Others'])
# Create legend & Show graphic
plt.legend()
plt.savefig(output_path)
bars1_path_ref = "../../results/LLM/GPT-4/GPT-4_zero_shot_results.json"
bars2_path_ref = "../../results/LLM/Mixtral-8x7b/MIXTRAL_zero_shot_results.json"
bars3_path_ref = "../../results/LLM/Mistral-7b/MISTRAL_zero_shot_results.json"
bars1_path_new = "../../results/LLM/GPT-4/GPT-4_few_shot_results.json"
bars2_path_new = "../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_results.json"
bars3_path_new = "../../results/LLM/Mistral-7b/MISTRAL_few_shot_results.json"
config = [
{
'ref': '../../results/LLM/Mistral-7b/MISTRAL_zero_shot_results.json',
'new': '../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_results.json',
'title': 'Mistral-7b',
'color': '#5619d8'
}
]
title = "Evolution from Zero-shot to Few-shot predicate extraction"
output_path = "../../results/LLM/zero_shot_to_few_shot_evolution.png"
title = "Evolution from Zero-shot to Fine-tuning predicate extraction with Mistral"
output_path = "../../results/LLM/few_shot_to_fine_tuning_mistral_evolution.png"
paths_ref = [bars1_path_ref, bars2_path_ref, bars3_path_ref]
paths_new = [bars1_path_new, bars2_path_new, bars3_path_new]
graph_system_evolution_eval(paths_ref, paths_new, title, output_path)
\ No newline at end of file
graph_system_evolution_eval(config, title, output_path)
\ No newline at end of file
......@@ -21,8 +21,11 @@ spec.loader.exec_module(recall_precision_f1_f2)
#data_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_cleaned.json"
#result_path="../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_results.json"
data_path = "../../results/LLM/Mistral-7b/MISTRAL_few_shot_cleaned.json"
result_path="../../results/LLM/Mistral-7b/MISTRAL_few_shot_results.json"
#data_path = "../../results/LLM/Mistral-7b/MISTRAL_few_shot_cleaned.json"
#result_path="../../results/LLM/Mistral-7b/MISTRAL_few_shot_results.json"
data_path = "../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_cleaned.json"
result_path="../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_results.json"
with open('../../data/evalQS.json', 'r') as fichier:
......
......@@ -9,8 +9,11 @@ import json, re
#input_path = "../../results/LLM/Mistral-7b/MISTRAL_few_shot_raw_answers.json"
#output_path = "../../results/LLM/Mistral-7b/MISTRAL_few_shot_cleaned.json"
input_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_raw_answers.json"
output_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_cleaned.json"
#input_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_raw_answers.json"
#output_path = "../../results/LLM/Mixtral-8x7b/MIXTRAL_few_shot_cleaned.json"
input_path = "../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_raw_answers.json"
output_path = "../../results/LLM/Mistral-7b/MISTRAL_fine_tuned_cleaned.json"
with open(input_path) as file:
data = json.load(file)
......
......@@ -3,72 +3,145 @@ import transformers
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel
import transformers, json
from alive_progress import alive_bar
import bitsandbytes, flash_attn
spec = importlib.util.spec_from_file_location("utils", "../utils.py")
utils = importlib.util.module_from_spec(spec)
spec.loader.exec_module(utils)
tokenizer = AutoTokenizer.from_pretrained("../../../models/Mixtral-8x7B-Instruct-v0.1")
model = AutoModelForCausalLM.from_pretrained("../../../models/Mixtral-8x7B-Instruct-v0.1",
load_in_4bit=True,
torch_dtype=torch.float16,
device_map="auto",
)
def fine_tuned(base_model, new_model):
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(base_model,
load_in_4bit=True,
torch_dtype=torch.float16,
device_map="auto",
)
# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)
# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)
tokenizer.pad_token = "!" #Not EOS, will explain another time.\
tokenizer.pad_token = "!" #Not EOS, will explain another time.\
LORA_R = 8
LORA_ALPHA = 2 * LORA_R
LORA_DROPOUT = 0.1
LORA_R = 8
LORA_ALPHA = 2 * LORA_R
LORA_DROPOUT = 0.1
config = LoraConfig(
r=LORA_R,
lora_alpha=LORA_ALPHA,
target_modules=[ "w1", "w2", "w3"], #just targetting the MoE layers.
lora_dropout=LORA_DROPOUT,
bias="none",
task_type="CAUSAL_LM"
)
config = LoraConfig(
r=LORA_R,
lora_alpha=LORA_ALPHA,
target_modules=[ "w1", "w2", "w3"], #just targetting the MoE layers.
lora_dropout=LORA_DROPOUT,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
dataset = load_dataset('csv', data_files='../../../data/finetuned_dataset.csv')
train_data = dataset["train"] # Not using evaluation data
def generate_prompt(user_query):
p = f"{user_query['input']}{user_query['output']}</s>"
return p
def tokenize(prompt):
return tokenizer(
prompt + tokenizer.eos_token,
truncation=True,
max_length= None ,
padding="max_length"
)
model = get_peft_model(model, config)
train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)), remove_columns=["input" , "output"])
dataset = load_dataset('csv', data_files='../../../data/finetuned_dataset.csv')
trainer = Trainer(
model=model,
train_dataset=train_data,
args=TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
num_train_epochs=6,
learning_rate=1e-4,
logging_steps=2,
optim="adamw_torch",
save_strategy="epoch",
output_dir="./results"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
model.eval()
train_data = dataset["train"] # Not using evaluation data
def generate(base_model, new_model):
def generate_prompt(user_query):
p = f"{user_query['input']}{user_query['output']}</s>"
return p
model = transformers.AutoModelForCausalLM.from_pretrained(
new_model,
#torch_dtype=torch.float16,
device_map="auto",
load_in_8bit=False,
load_in_4bit=True,
#attn_implementation="flash_attention_2"
)
model.eval()
tokenizer = transformers.AutoTokenizer.from_pretrained(new_model)
def tokenize(prompt):
return tokenizer(
prompt + tokenizer.eos_token,
truncation=True,
max_length= None ,
padding="max_length"
generate_text = transformers.pipeline(
model=model, tokenizer=tokenizer,
return_full_text=False, # if using langchain set True
task="text-generation",
# we pass model parameters here too
do_sample=True,
temperature=0.5, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
top_p=0.15, # select from top tokens whose probability add up to 15%
top_k=0, # select from top 0 tokens (because zero, relies on top_p)
max_new_tokens=2048, # max number of tokens to generate in the output
repetition_penalty=1.0 # if output begins repeating increase
)
train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)), remove_columns=["input" , "output"])
trainer = Trainer(
model=model,
train_dataset=train_data,
args=TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
num_train_epochs=6,
learning_rate=1e-4,
logging_steps=2,
optim="adamw_torch",
save_strategy="epoch",
output_dir="./results"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()
# Save the fine-tuned model
trainer.model.save_pretrained('../../../models/Fine-tuned_Mixtral-8x7b')
model.eval()
\ No newline at end of file
def instruction_format(sys_message: str, query: str):
# note, don't "</s>" to the end
return f'<s> [INST] {sys_message} [/INST]\nUser: {query}\nAssistant: '
with open('../../../data/evalQS.json', 'r', encoding='utf-8') as file:
loaded = json.load(file)
input = []
output = {}
with alive_bar(len(loaded)) as bar:
for sentence in loaded:
input.append(instruction_format(utils.get_pre_prompt_zero_shot(), sentence))
bar()
print("Input creation finished")
res = generate_text(input)
i = 0
for sentence in loaded:
output[sentence] = res[i][0]["generated_text"]
i += 1
with open('../../../results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_raw_answers.json', 'w', encoding='utf-8') as file:
json.dump(output, file) # in 44:36.6 (0.08/s)
print("========== Program finished ==========")
#######################################################################################################################
base_model = "../../../models/Mixtral-8x7B-Instruct-v0.1"
new_model = "../../../models/Fine-tuned_Mixtral-8x7b"
fine_tuned(base_model, new_model)
#generate(base_model, new_model)
\ No newline at end of file
This diff is collapsed.
{"global": {"recall": 0.06281833616298811, "precision": 0.10834553440702782, "f1": 0.07952713594841483, "f2": 0.0685820203892493}, "break_down": {"action": {"perfect_equals": 0, "subpart": 138, "miss_classification": 0, "hallucination": 8}, "actor": {"perfect_equals": 24, "subpart": 56, "miss_classification": 4, "hallucination": 16}, "artifact": {"perfect_equals": 14, "subpart": 83, "miss_classification": 0, "hallucination": 8}, "condition": {"perfect_equals": 9, "subpart": 199, "miss_classification": 2, "hallucination": 41}, "definition": {"perfect_equals": 0, "subpart": 0, "miss_classification": 1, "hallucination": 3}, "location": {"perfect_equals": 0, "subpart": 8, "miss_classification": 0, "hallucination": 5}, "modality": {"perfect_equals": 13, "subpart": 38, "miss_classification": 2, "hallucination": 24}, "reference": {"perfect_equals": 2, "subpart": 44, "miss_classification": 4, "hallucination": 33}, "time": {"perfect_equals": 12, "subpart": 43, "miss_classification": 6, "hallucination": 41}}}
\ No newline at end of file
results/LLM/few_shot_to_fine_tuning_mistral_evolution.png

22.3 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment