new hyper parameters and results

c1653711 · jbreton · 435cf81a · c1653711 · c1653711
Commit c1653711 authored 1 year ago by jbreton
--- a/modules/llm/Mixtral-8x7b/Mixtral-8x7b_fine_tune.py
+++ b/modules/llm/Mixtral-8x7b/Mixtral-8x7b_fine_tune.py
@@ -71,7 +71,8 @@ def fine_tuned(base_model, new_model):
        train_dataset=train_data,
        args=TrainingArguments(
            per_device_train_batch_size=2,
-            gradient_accumulation_steps=1,
+            gradient_accumulation_steps=2,
+            num_train_epochs=6,
            learning_rate=1e-4,
            logging_steps=2,
            optim="adamw_torch",

--- a/results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_raw_answers.json
+++ b/results/LLM/Mixtral-8x7b/MIXTRAL_fine_tuned_raw_answers.json