fresh commit

ddd2af48 · emetheni · 3eb3386d · ddd2af48 · ddd2af48 · ddd2af48
Commit ddd2af48 authored 1 year ago by emetheni
--- a/README.md
+++ b/README.md
-# DiscReT: Discourse Relation tagging
-The MELODI team submission for Task 3. 
+# New coling version

-## Contents
-* **mappings**: a folder with the label conversions we implemented, and specifications on which test results are created from which of our models.
-* **pytorch_classifier.py**: the bare classifier using mBERT-base-cased and built on Pytorch
-* **make_adapter.py**: code to create a classifier adapter, based on [AdapterHub](https://github.com/adapter-hub/adapter-transformers)
-* **adapter_classifier.py**: classifier using one of the trained adapters (training the adapter beforehand is required)
-* **requirements.txt**: list of dependencies
-* **train_classifiers.sh**: shell script to train all classifiers
-* **configure.py**: list of training arguments
-* **utils.py**: various functions
-
-## Installation
-* Pull data from the [DISRPT Shared Task repository](https://github.com/disrpt/sharedtask2023): 
-	```
-	git clone https://github.com/disrpt/sharedtask2023
-	```
-
-* Install requirements, either:
-	```
-	pip install -r requirements.txt
-	```
-	or by making a conda environment:
-	``` 
-	conda env create -f environment.yml
-	conda activate discret 
-	```
-
-## Running classifiers
-
-The results are created by three different models:
-* the **bare classifier**: an mBERT-base-cased model  (max. 6 epochs)
-* the **classifier with A1 adapter**: an mBERT-base-cased model trained for 3 epochs with an adapter trained with mBERT-base-cased, for 15 epochs, with frozen layer 1
-* the **classifier with A1-3 adapter**: an mBERT-base-cased model trained for 4 epochs with an adapter trained with mBERT-base-cased, for 15 epochs, with frozen layers 1-3
-
-Run either the **train_classifiers.sh** script or each script individually (adapters must be trained beforehand):
-
-### Bare classifier
-``` 
-python pytorch_classifier.py \
-		--num_epochs 6 \
-		--data_path [PATH_TO_DATA]
-```
-### Adapter training
-
-A 1:
-``` 
-python make_adapter.py \
-		--num_epochs 15 \
-		--freeze_layers 'layer.1'
-		--data_path [PATH_TO_DATA]
-```
-A 1-3:
-``` 
-python make_adapter.py \
-		--num_epochs 15 \
-		--freeze_layers 'layer.1;layer.2;layer.3'
-		--data_path [PATH_TO_DATA]
-```
-### Classifiers with adapter
-with A 1:
-```
-python adapter_classifier.py \
-		--num_epochs 3 \
-		--data_path [PATH_TO_DATA] \
-		--adapter_name 'adapter_15-epochs_frozen-1'
-```
-with A 1-3:
-```
-python adapter_classifier.py \
-		--num_epochs 4 \
-		--data_path [PATH_TO_DATA] \
-		--adapter_name 'adapter_15-epochs_frozen-1-2-3'
-```
+I'll update README soon!
\ No newline at end of file
--- a/classifier_bare_huggingface.py
+++ b/classifier_bare_huggingface.py
+import torch
+from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
+import torch
+from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer, set_seed
+from tqdm import tqdm
+import os
+from time import sleep
+from datetime import datetime
+import sys
+from sklearn.metrics import classification_report, accuracy_score
+from utils import open_file
+import pandas as pd
+import datasets
+from configure import parse_args
+from utils import *
+
+device = torch.device("cuda")
+
+# ---------------------------------------------------------------------------------------------------
+args = parse_args()
+now = datetime.now()
+dt_string = now.strftime("%d.%m.%y-%H:%M:%S")
+layers_to_freeze = args.freeze_layers.split(";")
+substitutions_file = 'mappings/substitutions.txt'
+# mapping_classes = args.mappings_file[:-4].split('-')[-1]
+set_seed(42)
+
+print('Model:', args.transformer_model, dt_string)
+print('Batch size:', args.batch_size * args.gradient_accumulation_steps)
+print('Num epochs:', args.num_epochs)
+
+# ===============
+# Dataset class
+# ===============
+
+# Open mappings
+mappings, inv_mappings = open_mappings(args.mappings_file)
+
+# Open sentences
+train_sentences, dev_dict_sentences, test_dict_sentences, framework_labels = open_sentences(args.data_path, mappings)
+
+# make pandas dataframes
+file_header = ['text', 'labels']
+
+train_df = pd.DataFrame([[' '.join(x[-2]), x[-1]] for x in train_sentences], 
+                        columns =file_header)
+train_df = train_df.sample(frac = 1) # shuffle the train
+
+# make a joint dev dataset in order to save models
+
+eval_df = pd.DataFrame([[' '.join(x[-2]), x[-1]] 
+                        for corpus, sents in dev_dict_sentences.items()
+                        for x in sents], 
+                        columns =file_header)
+
+# dev_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]] 
+#                                       for x in sents], 
+#                                      columns = file_header)
+#                for corpus, sents in dev_dict_sentences.items()}
+
+# test_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]] 
+#                                       for x in sents], 
+#                                      columns = file_header)
+#                for corpus, sents in test_dict_sentences.items()}
+
+#Make datasets from dataframes
+train_dataset = datasets.Dataset.from_pandas(train_df)
+eval_dataset =  datasets.Dataset.from_pandas(eval_df)
+# dev_dict_dataset  = {corpus:datasets.Dataset.from_pandas(dev_df) 
+#                      for corpus, dev_df in dev_dict_df.items()}
+# test_dict_dataset = {corpus:datasets.Dataset.from_pandas(dev_df) 
+#                      for corpus, dev_df in test_dict_df.items()}
+
+# get number of labels
+num_labels = len(set([int(x.strip()) 
+                      for x in train_df['labels'].to_string(index=False).split('\n')])) +1
+
+# Encode the data
+train_dataset = train_dataset.map(encode_batch, batched=True)
+train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
+
+eval_dataset = eval_dataset.map(encode_batch, batched=True)
+eval_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
+
+# encoded_dev_dataset = {}
+# for corpus in dev_dict_dataset:
+#     temp = dev_dict_dataset[corpus].map(encode_batch, batched=True)
+#     temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
+#     encoded_dev_dataset[corpus] = temp
+
+# encoded_test_dataset = {}
+# for corpus in test_dict_dataset:
+#     temp = test_dict_dataset[corpus].map(encode_batch, batched=True)
+#     temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
+#     encoded_test_dataset[corpus] = temp
+
+# ---------------------------------------------------------------------------------------------------
+    
+# MODEL
+model = AutoModelForSequenceClassification.from_pretrained(args.transformer_model, 
+                                                           num_labels=num_labels)
+
+
+training_args = TrainingArguments(
+    learning_rate    = 2e-5, #1e-4,
+    num_train_epochs = args.num_epochs,
+    per_device_train_batch_size = args.batch_size,
+    per_device_eval_batch_size  = args.batch_size,
+    gradient_accumulation_steps = args.gradient_accumulation_steps,
+    logging_steps  = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)),
+    output_dir = "./results/models/" + args.transformer_model + '_' + dt_string,
+    overwrite_output_dir = True,
+    remove_unused_columns = False,
+    warmup_steps = 1000,  # number of warmup steps for learning rate  
+    save_total_limit = args.num_epochs,
+    load_best_model_at_end = True,
+    weight_decay = 0.01,  # strength of weight decay
+    save_strategy='epoch', 
+    evaluation_strategy='epoch'
+    
+)
+
+trainer = Trainer(
+    model = model,
+    args  = training_args,
+    train_dataset = train_dataset, 
+    eval_dataset = eval_dataset
+)
+
+# Freeze layers in the classifier if desired
+if args.freeze_layers != '':
+    layers_to_freeze = args.freeze_layers.split(';')
+    for name, param in model.named_parameters():
+        if any(x in name for x in layers_to_freeze):
+            param.requires_grad = False
+
+
+# ===============================
+# Start the training 🚀
+# ===============================
+
+print('Start training...')
+trainer.train()
+
+
+#========================================================
+
+# ===============
+# Dataset class
+# ===============
+
+class Pytorch_Dataset(torch.utils.data.Dataset):
+
+    def __init__(self, sentences):
+
+        self.labels = [sent[-1] for sent in sentences]
+        self.texts = [tokenizer(sent[-2], 
+                                is_split_into_words=True,                              
+                                padding='max_length', 
+                                max_length = 512, 
+                                truncation=True,
+                                return_tensors="pt") 
+                                for sent in sentences]
+
+    def classes(self):
+        return self.labels
+
+    def __len__(self):
+        return len(self.labels)
+    
+    def get_batch_labels(self, idx):
+        # Fetch a batch of labels
+        return np.array(self.labels[idx])
+
+    def get_batch_texts(self, idx):
+        # Fetch a batch of inputs
+        return self.texts[idx]
+
+    def __getitem__(self, idx):
+
+        batch_texts = self.get_batch_texts(idx)
+        batch_y = self.get_batch_labels(idx)
+
+        return batch_texts, batch_y
+
+# ===============
+# Load datasets
+# ===============
+
+# make train/dev datasets
+dev_dataset   = {corpus: Pytorch_Dataset(s) for corpus, s in dev_dict_sentences.items()}
+test_dataset  = {corpus: Pytorch_Dataset(s) for corpus, s in test_dict_sentences.items()}
+
+# Make dasets with batches and dataloader
+dev_dict_dataloader = {corpus: DataLoader(dev_data, batch_size) 
+                        for corpus, dev_data in dev_dataset.items()}
+test_dict_dataloader = {corpus: DataLoader(test_data, batch_size) 
+                        for corpus, test_data in test_dataset.items()}
+
+# Dev results
+
+print('\nDev results:')
+for corpus in dev_dict_dataloader:
+    dev_labels, dev_results = get_better_predictions(model, 
+                                   corpus, 
+                                   dev_dict_dataloader[corpus], 
+                                   framework_labels[corpus.split('.')[1]]
+                                  )
+    
+    
+    old_results = get_predictions(model, 
+                                corpus, 
+                                dev_dict_dataloader[corpus])
+    
+    print('new:', print_better_accuracies(dev_labels, dev_results))
+    
+    
+#     path_results = 'results/dev/' + args.transformer_model + '_' + str(args.num_epochs)
+#     if not os.path.exists(path_results):
+#         os.makedirs(path_results)
+                
+#     print_results_to_file(corpus, 
+#                           dev_dict_sentences[corpus], 
+#                           dev_results,
+#                           inv_mappings, 
+#                           #substitutions_file, 
+#                           path_results)
+
+# Test results
+
+print('\ntest results:')
+for corpus in encoded_test_dataset:
+    print()
+    test_results = get_predictions_huggingface(trainer, 
+                                               corpus, 
+                                               encoded_test_dataset[corpus])
+    
+    
+#     path_results = 'results/test/' + args.transformer_model + '_' + str(args.num_epochs)
+#     if not os.path.exists(path_results):
+#         os.makedirs(path_results)
+                
+#     print_results_to_file(corpus, 
+#                           test_dict_sentences[corpus], 
+#                           test_results,
+#                           inv_mappings, 
+#                           #substitutions_file, 
+#                           path_results)
--- a/pytorch_classifier.py
+++ b/pytorch_classifier.py
@@ -23,9 +23,12 @@ now = datetime.now()
 dt_string = now.strftime("%d.%m.%y-%H:%M:%S")
 layers_to_freeze = args.freeze_layers.split(";")
 substitutions_file = 'mappings/substitutions.txt'
-mapping_classes = args.mappings_file[:-4].split('-')[-1]
+# mapping_classes = args.mappings_file[:-4].split('-')[-1]
 # specific_results = open_specific_results('mappings/specific_results.txt')['B']
+
+
 set_seed(42)
+torch.manual_seed(42)

 # ===============
 # Dataset class
@@ -74,7 +77,7 @@ mappings, inv_mappings = open_mappings(args.mappings_file)
 batch_size = args.batch_size
 tokenizer  = AutoTokenizer.from_pretrained(args.transformer_model)

-train_sentences, dev_dict_sentences, test_dict_sentences = open_sentences(args.data_path, mappings)
+train_sentences, dev_dict_sentences, test_dict_sentences, framework_labels = open_sentences(args.data_path, mappings)

 # Determine linear size (= number of classes in the sets + 1)
 num_labels = len(set(sent[-1] for sent in train_sentences)) + 1
@@ -212,35 +215,47 @@ def train(model,
            dev_results = get_predictions(model, 
                                corpus, 
                                dev_dict_dataloader[corpus])
+            better_dev_results = get_better_predictions(model, 
+                                corpus, 
+                                dev_dict_dataloader[corpus], 
+                                                 framework_labels)
            
-            path_results = 'results/dev/pytorch_' + mapping_classes + '_' + str(epoch_num+1)
-            if not os.path.exists(path_results):
-                os.makedirs(path_results)
+#             path_results = 'results/dev/language_' + mapping_classes + '_' + str(epoch_num+1)
+#             if not os.path.exists(path_results):
+#                 os.makedirs(path_results)
                
-            print_results_to_file(corpus, 
-                                dev_dict_sentences[corpus], 
-                                dev_results,
-                                inv_mappings, #substitutions_file, 
-                                path_results)
+#             print_results_to_file(corpus, 
+#                                 dev_dict_sentences[corpus], 
+#                                 dev_results,
+#                                 inv_mappings, #substitutions_file, 
+#                                 path_results)
            
        # ------ Test --------
        
        print('\nTest results for epoch:', epoch_num + 1)
        
        for corpus in test_dict_dataloader:
-            test_results = get_predictions(model, 
+            test_results = get_predictions(
+                                model, 
                                corpus, 
-                                test_dict_dataloader[corpus])
+                                test_dict_dataloader[corpus]
+                                )
+            better_test_results = get_better_predictions(
+                                    model, 
+                                    corpus, 
+                                    test_dict_dataloader[corpus], 
+                                    framework_labels
+                                    )
            
-            path_results = 'results/test/pytorch_' + mapping_classes + '_' + str(epoch_num+1)
-            if not os.path.exists(path_results):
-                os.makedirs(path_results)
+#             path_results = 'results/test/language_' + mapping_classes + '_' + str(epoch_num+1)
+#             if not os.path.exists(path_results):
+#                 os.makedirs(path_results)
                
-            print_results_to_file(corpus, 
-                                test_dict_sentences[corpus], 
-                                test_results,
-                                inv_mappings, #substitutions_file, 
-                                path_results)
+#             print_results_to_file(corpus, 
+#                                 test_dict_sentences[corpus], 
+#                                 test_results,
+#                                 inv_mappings, #substitutions_file, 
+#                                 path_results)
            
            
 #         # we want the results of specific epochs for specific corpora. 
@@ -279,6 +294,24 @@ print('\nTraining Done!')
 # ------- Testing ---------

 # print('Testing...')
+
+
+# for corpus in dev_dict_dataloader:
+#     dev_labels, dev_results = get_better_predictions(model, 
+#                                    corpus, 
+#                                    dev_dict_dataloader[corpus], 
+#                                    framework_labels[corpus.split('.')[1]]
+#                                   )
+    
+    
+#     old_results = get_predictions(model, 
+#                                 corpus, 
+#                                 dev_dict_dataloader[corpus])
+    
+    
+#     print_results_to_file(corpus, 
+
+
 # for corpus in test_dict_dataloader:
 #     test_results = get_predictions(model, 
 #                                    corpus, 

--- a/adapter_classifier.py
+++ b/adapter_classifier.py
@@ -38,6 +38,8 @@ tokenizer = AutoTokenizer.from_pretrained(args.transformer_model)

 set_seed(42)

+            
+
 print('Train classifier with adapter\n')
 print('Adapter name:', adapter_name)
 print('Model:', args.transformer_model)
@@ -111,9 +113,16 @@ training_args = TrainingArguments(
    per_device_eval_batch_size  = args.batch_size,
    gradient_accumulation_steps = args.gradient_accumulation_steps,
    logging_steps  = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)),
-    output_dir = "./training_output",
-    overwrite_output_dir =True,
-    remove_unused_columns=False,
+    output_dir = "./results/models/run_" + args.transformer_model,
+    overwrite_output_dir = True,
+    remove_unused_columns = False,
+    warmup_steps = 1000,  # number of warmup steps for learning rate  
+#     save_steps = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)) / 1368,
+    save_total_limit = args.num_epochs,
+    load_best_model_at_end = True,
+    weight_decay = 0.01,  # strength of weight decay
+    save_strategy='epoch', 
+    evaluation_strategy='epoch'
 )


@@ -123,6 +132,7 @@ trainer = Trainer(
    train_dataset = train_dataset
 )

+
 # Freeze layers in the classifier if desired
 if args.freeze_layers != '':
    layers_to_freeze = args.freeze_layers.split(';')
@@ -155,7 +165,7 @@ for corpus in encoded_dev_dataset:
                          dev_dict_sentences[corpus], 
                          dev_results,
                          inv_mappings, 
-                          substitutions_file, 
+                          #substitutions_file, 
                          path_results)

 # Test results
@@ -176,7 +186,7 @@ for corpus in encoded_test_dataset:
                          test_dict_sentences[corpus], 
                          test_results,
                          inv_mappings, 
-                          substitutions_file, 
+                          #substitutions_file, 
                          path_results)



--- a/configure.py
+++ b/configure.py
@@ -16,6 +16,9 @@ def parse_args():
    # transformer model
    parser.add_argument("--transformer_model", default="bert-base-multilingual-cased", type=str, 
                        help="Model used, default: bert-multilingual-base-cased")
+    # tokenizer (you need to load the model tokenizer for checkpoint models)
+    parser.add_argument("--tokenizer_model", default="bert-base-multilingual-cased", type=str, 
+                        help="Tokenizer, default: bert-multilingual-base-cased")

    # Number of training epochs
    parser.add_argument("--num_epochs", default=4, type=int, 
@@ -48,6 +51,11 @@ def parse_args():
    # normalize direction
    parser.add_argument("--normalize_direction", default='yes', type=str, 
                        help="Change order of sentences when the direction of relations is 1<2 to 2>1.") 
+    
+    # only specific languages/corpora
+    parser.add_argument("--langs_to_use", default='', type=str, 
+                        help="List of languages/corpora to use, a str separated by ;")   
+    
            
    args = parser.parse_args()


--- a/get_mappings_all_sets.py
+++ b/get_mappings_all_sets.py
+#!/usr/bin/env python
+# coding: utf-8
+
+import os
+from configure import parse_args
+from utils import *
+
+args = parse_args()
+
+corpora = args.langs_to_use.split(';')
+
+files = ['data/'+folder+'/'+f for folder in os.listdir('data') 
+         for f in os.listdir('data/' + folder) 
+         if f.endswith('.rels')
+         if any(x in f for x in corpora)]
+
+# open substitutions per file
+subs = {}
+with open(substitutions_file, 'r', encoding='utf-8') as f:
+    next(f)
+    for line in f:
+        l = line.strip().split('\t')
+        if not l[1] in revert_substitutions:
+            subs[l[1]] = {}
+        subs[l[1]][l[0]] = l[2]
+        
+
+def read_file(file):
+    ''' Open the .rels file. '''
+    lines = []
+    
+    with open(file, 'r', encoding='utf-8') as f:
+        next(f)
+        for line in f:
+            if not "__" in line: # avoid files with underscores
+                l = line.strip().split('\t')   
+                if l[11].lower() in replacements:
+                    l[11] = replacements[l[11].lower()]
+                lines.append([l[3].split(' ') + ['[SEP]'] + l[4].split(' '), 
+                              l[9], l[11].lower()])  
+        return lines
+
+
+train_files = [f for f in files if 'train' in f]
+dev_files = [f for f in files if 'dev' in f]
+test_files = [f for f in files if 'test' in f]
+
+
+train_sentences = []
+for f in train_files:
+    temp = read_file(f)
+    if temp != []:
+        train_sentences += temp
+
+dev_sentences = []
+for f in dev_files:
+    temp = read_file(f)
+    if temp != []:
+        dev_sentences += temp
+    
+test_sentences = []
+for f in dev_files:
+    temp = read_file(f)
+    if temp != []:
+        test_sentences += temp
+
+
+
+list_labels = list(set( [x[-1] for x in train_sentences] +
+                        [x[-1] for x in dev_sentences] +
+                        [x[-1] for x in test_sentences]))
+
+dict_labels = dict(enumerate(list_labels))
+inv_labels = {v: str(k) for k, v in dict_labels.items()}
+
+# add the replacements as well!
+for r in replacements:
+    inv_labels[r] = inv_labels[replacements[r]]
+
+with open('mappings.tsv', 'w') as f:
+    for k, v in inv_labels.items():
+        f.write(k + '\t' + str(v) + '\n')
\ No newline at end of file
--- a/get_predictions.sh
+++ b/get_predictions.sh
+#!/usr/bin/env bash
+
+#SBATCH --job-name=model-LC
+
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=4
+#SBATCH --partition=GPUNodes
+#SBATCH --gres=gpu:1
+
+
+# tests tests
+
+
+# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 xml-roberta-classifier.py \
+#     --batch_size 4 \
+#     --gradient_accumulation_steps 32 \
+#     --num_epochs 6 \
+#     --data_path '/users/melodi/emetheni/clean_data' \
+#     --mappings_file 'mappings/mappings_substitutions.tsv' \
+#     --transformer_model "xlm-roberta-base"
+   
+    
+srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 open_finetuned_model.py \
+    --data_path '/users/melodi/emetheni/clean_data' \
+    --mappings_file 'mappings/mappings_substitutions.tsv' \
+    --transformer_model 'results/models/run_xlm-roberta-base/checkpoint-13500'
--- a/huggingface_classifier.py
+++ b/huggingface_classifier.py
-#!/usr/bin/env python
-# coding: utf-8
-
-import torch
-import numpy as np
-from transformers import AutoModel, AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, AutoConfig, TrainingArguments, Trainer, EvalPrediction, set_seed
-from torch import nn
-from torch.optim import AdamW
-from torch.utils.data import DataLoader
-import torch.nn.functional as F
-from torch.autograd import Variable
-from tqdm import tqdm
-import os
-from time import sleep
-from datetime import datetime
-import sys
-from sklearn.metrics import classification_report, accuracy_score
-from utils import open_file
-import pandas as pd
-import datasets
-from configure import parse_args
-from utils import *
-
-args = parse_args()
-now = datetime.now()
-dt_string = now.strftime("%d.%m.%y-%H:%M:%S")
-save_name = args.mappings_file.split('-')[-1]
-mappings, inv_mappings = open_mappings(args.mappings_file)
-substitutions_file = 'mappings/substitutions.txt'
-tokenizer = AutoTokenizer.from_pretrained(args.transformer_model)
-
-
-set_seed(42)
-
-print('Model:', args.transformer_model)
-print('Batch size:', args.batch_size * args.gradient_accumulation_steps)
-print('Num epochs:', args.num_epochs)
-
-# Open mappings
-mappings, inv_mappings = open_mappings(args.mappings_file)
-
-# Open sentences
-train_sentences, dev_dict_sentences, test_dict_sentences = open_sentences(args.data_path, mappings)
-
-# make pandas dataframes
-file_header = ['text', 'labels']
-
-train_df = pd.DataFrame([[' '.join(x[-2]), x[-1]] for x in train_sentences], 
-                        columns =file_header)
-train_df = train_df.sample(frac = 1) # shuffle the train
-
-dev_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]] 
-                                      for x in sents], 
-                                     columns = file_header)
-               for corpus, sents in dev_dict_sentences.items()}
-
-test_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]] 
-                                      for x in sents], 
-                                     columns = file_header)
-               for corpus, sents in test_dict_sentences.items()}
-
-#Make datasets from dataframes
-train_dataset = datasets.Dataset.from_pandas(train_df)
-dev_dict_dataset  = {corpus:datasets.Dataset.from_pandas(dev_df) 
-                     for corpus, dev_df in dev_dict_df.items()}
-test_dict_dataset = {corpus:datasets.Dataset.from_pandas(dev_df) 
-                     for corpus, dev_df in test_dict_df.items()}
-
-# get number of labels
-num_labels = len(set([int(x.strip()) 
-                      for x in train_df['labels'].to_string(index=False).split('\n')])) +1
-
-# Encode the data
-train_dataset = train_dataset.map(encode_batch, batched=True)
-train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
-
-encoded_dev_dataset = {}
-for corpus in dev_dict_dataset:
-    temp = dev_dict_dataset[corpus].map(encode_batch, batched=True)
-    temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
-    encoded_dev_dataset[corpus] = temp
-
-encoded_test_dataset = {}
-for corpus in test_dict_dataset:
-    temp = test_dict_dataset[corpus].map(encode_batch, batched=True)
-    temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
-    encoded_test_dataset[corpus] = temp
-
-# ===============================
-# Training params
-# ===============================
-
-model = AutoModelForSequenceClassification.from_pretrained(args.transformer_model)
-
-
-training_args = TrainingArguments(
-    learning_rate    = 2e-5, #1e-4,
-    num_train_epochs = args.num_epochs,
-    per_device_train_batch_size = args.batch_size,
-    per_device_eval_batch_size  = args.batch_size,
-    gradient_accumulation_steps = args.gradient_accumulation_steps,
-    logging_steps  = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)),
-    output_dir = "./training_output",
-    overwrite_output_dir =True,
-    remove_unused_columns=False,
-)
-
-
-trainer = Trainer(
-    model = model,
-    args  = training_args,
-    train_dataset = train_dataset
-)
-
-# Freeze layers in the classifier if desired
-if args.freeze_layers != '':
-    layers_to_freeze = args.freeze_layers.split(';')
-    for name, param in model.named_parameters():
-        if any(x in name for x in layers_to_freeze):
-            param.requires_grad = False
-
-
-# ===============================
-# Start the training 🚀
-# ===============================
-
-print('Start training...')
-trainer.train()
-
-# Dev results
-
-print('\nDev results:')
-for corpus in encoded_dev_dataset:
-    print()
-    dev_results = get_predictions_huggingface(trainer, corpus, 
-                                    encoded_dev_dataset[corpus])
-    
-    
-    path_results = 'results/dev/' + save_name + '_' + str(args.num_epochs)
-    if not os.path.exists(path_results):
-        os.makedirs(path_results)
-                
-    print_results_to_file(corpus, 
-                          dev_dict_sentences[corpus], 
-                          dev_results,
-                          inv_mappings, 
-                          #substitutions_file, 
-                          path_results)
-
-# Test results
-
-print('\ntest results:')
-for corpus in encoded_test_dataset:
-    print()
-    test_results = get_predictions_huggingface(trainer, 
-                                               corpus, 
-                                               encoded_test_dataset[corpus])
-    
-    
-    path_results = 'results/test/' + save_name + '_' + str(args.num_epochs)
-    if not os.path.exists(path_results):
-        os.makedirs(path_results)
-                
-    print_results_to_file(corpus, 
-                          test_dict_sentences[corpus], 
-                          test_results,
-                          inv_mappings, 
-                          substitutions_file, 
-                          path_results)
-
-
-
-#         for corpus in test_dict_dataloader:
-#             test_results = get_predictions(model, 
-#                                 corpus, 
-#                                 test_dict_dataloader[corpus])
-            
-#             path_results = 'results/test/pytorch' + str(epoch_num+1)
-#             if not os.path.exists(path_results):
-#                 os.makedirs(path_results)
-                
-#             print_results_to_file(corpus, 
-#                                 test_dict_sentences[corpus], 
-#                                 test_results,
-#                                 inv_mappings, substitutions_file, 
-#                                 path_results)    
-    
-    
-    
-    
-    
-    
-
-# Save specific test results
-
-# print('\nTest results:')
-# for corpus in encoded_test_dataset:
-#     print()
-#     test_results = get_predictions_huggingface(trainer, corpus, 
-#                                     encoded_test_dataset[corpus])
-# 
-#     print_results_to_file(corpus, test_dict_sentences[corpus], test_results, 
-#                           inv_mappings, substitutions_file)
\ No newline at end of file
--- a/make-mappings.ipynb
+++ b/make-mappings.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, io\n",
+    "import torch\n",
+    "from transformers import AutoConfig, AutoTokenizer\n",
+    "import numpy as np\n",
+    "from sklearn.metrics import accuracy_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filename = 'eng.explicit.train.rels'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def open_mappings(mappings_file):\n",
+    "    \n",
+    "    ''' Open the mappings file into a dictionary.'''\n",
+    "    \n",
+    "    mappings = {}\n",
+    "    with open(mappings_file, 'r') as f:\n",
+    "        next(f)\n",
+    "        for l in f:\n",
+    "            mappings[l.split('\\t')[0]] = int(l.strip().split('\\t')[-1])\n",
+    "    inv_mappings = {v:k for k, v in mappings.items()}\n",
+    "\n",
+    "    return mappings, inv_mappings\n",
+    "\n",
+    "\n",
+    "def open_file(filename, mappings_dict, normalize_direction='yes'):   \n",
+    "    \n",
+    "    ''' Function to open a .rels file. \n",
+    "        Arguments: \n",
+    "        - filename: the path to a .rels file \n",
+    "        - mappings_dict: a dictionary of mappings of unique labels to integers\n",
+    "        Returns a list of lists, where each list is:\n",
+    "        the line + [two sentences combined with special BERT token, encoded label]\n",
+    "    '''\n",
+    "    \n",
+    "    max_len = 254 # 512 (max bert len) / 2 (2 sents) -2 (special tokens)\n",
+    "    lines = []\n",
+    "    SEP_token = '[SEP]'\n",
+    "\n",
+    "    with open(filename, 'r', encoding='utf-8') as f:\n",
+    "        next(f)\n",
+    "        for line in f:\n",
+    "            l = line.strip().split('\\t')\n",
+    "            \n",
+    "            if len(l) > 1:\n",
+    "                # chop the sentences to max_len if too long\n",
+    "                sent_1 = l[3].split(' ')\n",
+    "                sent_2 = l[4].split(' ')      \n",
+    "                \n",
+    "                if len(sent_1) > max_len:\n",
+    "                    sent_1 = sent_1[:max_len]\n",
+    "                if len(sent_2) > max_len:\n",
+    "                    sent_2 = sent_2[:max_len]\n",
+    "                \n",
+    "                # flip them if different direction\n",
+    "                if normalize_direction == 'yes':\n",
+    "                    if l[9] == '1>2':\n",
+    "                        lines.append(l + [sent_1 + [SEP_token] + sent_2, mappings_dict[l[11].lower()]])\n",
+    "                    else:\n",
+    "                        lines.append(l + [sent_2 + [SEP_token] + sent_1, mappings_dict[l[11].lower()]])\n",
+    "                else:\n",
+    "                    lines.append(l + [sent_1 + [SEP_token] + sent_2, mappings[l[11].lower()]])\n",
+    "\n",
+    "    return lines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mappings, inv_mappings = open_mappings('mappings/mappings_substitutions.tsv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['expansion.conjunction', 'expansion.level-of-detail', 'expansion.instantiation', 'contingency.cause', 'expansion.substitution', 'comparison.contrast', 'temporal.asynchronous', 'comparison.similarity', 'contingency.condition', 'contingency.purpose', 'expansion.manner', 'temporal.synchronous', 'contingency.negative-condition', 'expansion.exception', 'comparison.concession', 'contingency.condition+speechact', 'contingency.cause+speechact', 'expansion.disjunction', 'contingency.cause+belief']\n"
+     ]
+    }
+   ],
+   "source": [
+    "lines =  open_file('imp-exp/' + filename, mappings)\n",
+    "present_labels = list(set([x[11].lower() for x in lines]))\n",
+    "print(present_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subs = {\"alternation\": \"expansion.alternative\", \n",
+    "        \"alternative\": \"expansion.alternative\", \n",
+    "        \"bg-general\": \"background\", \n",
+    "        \"causation\": \"cause\", \"clarification_question\": \"q_elab\",\n",
+    "        \"conditional\": \"condition\", \"conjunction\": \"expansion.conjunction\",\n",
+    "        \"correction\": \"expansion.correction\", \"disjunction\": \"expansion.disjunction\", \n",
+    "        \"evidence\": \"explanation-evidence\", \"exp-evidence\": \"explanation-evidence\", \n",
+    "        \"goal\": \"purpose-goal\", \"joint-disjunction\": \"expansion.disjunction\",\n",
+    "        \"justify\": \"explanation-justify\", \"list\": \"joint-list\", \n",
+    "        \"mode-manner\": \"manner-means\", \"motivation\": \"explanation-motivation\", \n",
+    "        \"motivation\": \"explanation-motivation\", \"otherwise\": \"adversative\", \n",
+    "        \"repetition\": \"restatement-repetition\", \"restatement\": \"expansion.restatement\", \n",
+    "        \"sequence\": \"joint-sequence\", \"temporal.synchrony\": \"temporal.synchronous\", \n",
+    "        \"textual-organization\": \"organization\", \"unconditional\": \"expansion.disjunction\", \n",
+    "        \"unless\": \"contrast\", \n",
+    "        \"expansion.genexpansion\": \"expansion\", \n",
+    "        \"cause-result\" : 'cause-effect', \n",
+    "        \"qap\": \"question_answer_pair\", \n",
+    "        \"contingency.negative-condition+speechact\": 'contingency', \n",
+    "        \"contingency.negative\" : 'contingency', \n",
+    "        \"expansion.genexpansion\" : 'expansion', \n",
+    "        \"expansion.level\" : 'expansion', \n",
+    "        \"qap.hypophora\": 'hypophora', \n",
+    "        \"findings\" : 'result'\n",
+    "       \n",
+    "       }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['expansion.conjunction', 'expansion.level-of-detail', 'expansion.instantiation', 'contingency.cause', 'expansion.substitution', 'comparison.contrast', 'temporal.asynchronous', 'comparison.similarity', 'contingency.condition', 'contingency.purpose']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# correct order\n",
+    "for l in mappings:\n",
+    "    if l not in present_labels:\n",
+    "        present_labels.append(l)\n",
+    "print(present_labels[:10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make substitutions\n",
+    "\n",
+    "temp = []\n",
+    "for l in present_labels:\n",
+    "    if l in subs:\n",
+    "        temp.append(subs[l])\n",
+    "    else:\n",
+    "        temp.append(l)\n",
+    "\n",
+    "temp2 = []\n",
+    "for l in temp:\n",
+    "    if not l in temp2:\n",
+    "        temp2.append(l)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_mappings = dict(enumerate(temp2))\n",
+    "new_mappings = {v:k for k, v in new_mappings.items()}\n",
+    "\n",
+    "for l in present_labels:\n",
+    "    if l not in new_mappings:\n",
+    "        new_mappings[l] = new_mappings[subs[l]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert len(new_mappings) == len(mappings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('imp-exp/mappings_' + filename[:-5].replace('.', '-') + '.tsv', 'w') as f:\n",
+    "    for k, v in new_mappings.items():\n",
+    "        f.write(k + '\\t' + str(v) + '\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
+%% Cell type:code id: tags:
+
+``` python
+import os, io
+import torch
+from transformers import AutoConfig, AutoTokenizer
+import numpy as np
+from sklearn.metrics import accuracy_score
+```
+
+%% Cell type:code id: tags:
+
+``` python
+filename = 'eng.explicit.train.rels'
+```
+
+%% Cell type:code id: tags:
+
+``` python
+def open_mappings(mappings_file):
+
+    ''' Open the mappings file into a dictionary.'''
+
+    mappings = {}
+    with open(mappings_file, 'r') as f:
+        next(f)
+        for l in f:
+            mappings[l.split('\t')[0]] = int(l.strip().split('\t')[-1])
+    inv_mappings = {v:k for k, v in mappings.items()}
+
+    return mappings, inv_mappings
+
+
+def open_file(filename, mappings_dict, normalize_direction='yes'):
+
+    ''' Function to open a .rels file.
+        Arguments:
+        - filename: the path to a .rels file
+        - mappings_dict: a dictionary of mappings of unique labels to integers
+        Returns a list of lists, where each list is:
+        the line + [two sentences combined with special BERT token, encoded label]
+    '''
+
+    max_len = 254 # 512 (max bert len) / 2 (2 sents) -2 (special tokens)
+    lines = []
+    SEP_token = '[SEP]'
+
+    with open(filename, 'r', encoding='utf-8') as f:
+        next(f)
+        for line in f:
+            l = line.strip().split('\t')
+
+            if len(l) > 1:
+                # chop the sentences to max_len if too long
+                sent_1 = l[3].split(' ')
+                sent_2 = l[4].split(' ')
+
+                if len(sent_1) > max_len:
+                    sent_1 = sent_1[:max_len]
+                if len(sent_2) > max_len:
+                    sent_2 = sent_2[:max_len]
+
+                # flip them if different direction
+                if normalize_direction == 'yes':
+                    if l[9] == '1>2':
+                        lines.append(l + [sent_1 + [SEP_token] + sent_2, mappings_dict[l[11].lower()]])
+                    else:
+                        lines.append(l + [sent_2 + [SEP_token] + sent_1, mappings_dict[l[11].lower()]])
+                else:
+                    lines.append(l + [sent_1 + [SEP_token] + sent_2, mappings[l[11].lower()]])
+
+    return lines
+```
+
+%% Cell type:code id: tags:
+
+``` python
+mappings, inv_mappings = open_mappings('mappings/mappings_substitutions.tsv')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+lines =  open_file('imp-exp/' + filename, mappings)
+present_labels = list(set([x[11].lower() for x in lines]))
+print(present_labels)
+```
+
+%% Output
+
+    ['expansion.conjunction', 'expansion.level-of-detail', 'expansion.instantiation', 'contingency.cause', 'expansion.substitution', 'comparison.contrast', 'temporal.asynchronous', 'comparison.similarity', 'contingency.condition', 'contingency.purpose', 'expansion.manner', 'temporal.synchronous', 'contingency.negative-condition', 'expansion.exception', 'comparison.concession', 'contingency.condition+speechact', 'contingency.cause+speechact', 'expansion.disjunction', 'contingency.cause+belief']
+
+%% Cell type:code id: tags:
+
+``` python
+subs = {"alternation": "expansion.alternative",
+        "alternative": "expansion.alternative",
+        "bg-general": "background",
+        "causation": "cause", "clarification_question": "q_elab",
+        "conditional": "condition", "conjunction": "expansion.conjunction",
+        "correction": "expansion.correction", "disjunction": "expansion.disjunction",
+        "evidence": "explanation-evidence", "exp-evidence": "explanation-evidence",
+        "goal": "purpose-goal", "joint-disjunction": "expansion.disjunction",
+        "justify": "explanation-justify", "list": "joint-list",
+        "mode-manner": "manner-means", "motivation": "explanation-motivation",
+        "motivation": "explanation-motivation", "otherwise": "adversative",
+        "repetition": "restatement-repetition", "restatement": "expansion.restatement",
+        "sequence": "joint-sequence", "temporal.synchrony": "temporal.synchronous",
+        "textual-organization": "organization", "unconditional": "expansion.disjunction",
+        "unless": "contrast",
+        "expansion.genexpansion": "expansion",
+        "cause-result" : 'cause-effect',
+        "qap": "question_answer_pair",
+        "contingency.negative-condition+speechact": 'contingency',
+        "contingency.negative" : 'contingency',
+        "expansion.genexpansion" : 'expansion',
+        "expansion.level" : 'expansion',
+        "qap.hypophora": 'hypophora',
+        "findings" : 'result'
+
+       }
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# correct order
+for l in mappings:
+    if l not in present_labels:
+        present_labels.append(l)
+print(present_labels[:10])
+```
+
+%% Output
+
+    ['expansion.conjunction', 'expansion.level-of-detail', 'expansion.instantiation', 'contingency.cause', 'expansion.substitution', 'comparison.contrast', 'temporal.asynchronous', 'comparison.similarity', 'contingency.condition', 'contingency.purpose']
+
+%% Cell type:code id: tags:
+
+``` python
+# make substitutions
+
+temp = []
+for l in present_labels:
+    if l in subs:
+        temp.append(subs[l])
+    else:
+        temp.append(l)
+
+temp2 = []
+for l in temp:
+    if not l in temp2:
+        temp2.append(l)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+new_mappings = dict(enumerate(temp2))
+new_mappings = {v:k for k, v in new_mappings.items()}
+
+for l in present_labels:
+    if l not in new_mappings:
+        new_mappings[l] = new_mappings[subs[l]]
+```
+
+%% Cell type:code id: tags:
+
+``` python
+assert len(new_mappings) == len(mappings)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+with open('imp-exp/mappings_' + filename[:-5].replace('.', '-') + '.tsv', 'w') as f:
+    for k, v in new_mappings.items():
+        f.write(k + '\t' + str(v) + '\n')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
--- a/make_adapter.py
+++ b/make_adapter.py
@@ -18,10 +18,9 @@ tokenizer = AutoTokenizer.from_pretrained(args.transformer_model)
 layers_to_freeze = args.freeze_layers.split(';')
 set_seed(42)
 batch_size = args.batch_size
-mapping_classes = args.mappings_file[:-4].split('-')[-1]

 # Set name for adapter
-adapter_name = 'A_' + str(args.num_epochs) + '-F_' + args.freeze_layers.replace('layer.', '-').replace(';', '') + '-M_' + mapping_classes
+adapter_name = args.adapter_name 

 print('Create classifier adapter\n')
 print('Name:', adapter_name)
@@ -33,7 +32,12 @@ print('Frozen layers:',  args.freeze_layers.replace(';', ', '))
 mappings, inv_mappings = open_mappings(args.mappings_file)

 # Open sentences
-train_sentences, dev_dict_sentences, _ = open_sentences(args.data_path, mappings)
+train_sentences, dev_dict_sentences, _ = open_sentences_with_lang(args.data_path, mappings)
+
+
+print('\nCheck encodings:\n')
+print(train_sentences[0])
+

 # make pandas dataframes
 file_header = ['text', 'labels']
@@ -91,15 +95,24 @@ training_args = TrainingArguments(
    per_device_eval_batch_size  = args.batch_size,
    gradient_accumulation_steps = args.gradient_accumulation_steps,
    logging_steps  = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)),
-    output_dir = "./training_output",
-    overwrite_output_dir =True,
-    remove_unused_columns=False,
+    output_dir = "./results/models/adapter_" + args.transformer_model,
+    overwrite_output_dir = True,
+    remove_unused_columns = False,
+    warmup_steps = 1000,  # number of warmup steps for learning rate  
+#     save_steps = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)) / 1368,
+    save_total_limit = args.num_epochs,
+    load_best_model_at_end = True,
+    weight_decay = 0.01,  # strength of weight decay
+    save_strategy='epoch', 
+    evaluation_strategy='epoch'
+    
 )

 trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
+    eval_dataset = dev_dataset
 )

 # freeze layers

--- a/mappings/mappings_substitutions.tsv
+++ b/mappings/mappings_substitutions.tsv
+LABEL	MAP
 mode-means	0
 expansion.restatement	1
 expansion.substitution	2

--- a/open_finetuned_model.py
+++ b/open_finetuned_model.py
+#!/usr/bin/env python
+# coding: utf-8
+
+import torch
+import numpy as np
+from transformers import AutoModel, AutoTokenizer, get_linear_schedule_with_warmup, set_seed
+from torch import nn
+from torch.optim import AdamW
+from torch.utils.data import DataLoader
+import torch.nn.functional as F
+from torch.autograd import Variable
+from tqdm import tqdm
+import os
+from time import sleep
+from datetime import datetime
+import sys
+from sklearn.metrics import classification_report, accuracy_score
+from configure import parse_args
+from utils import *
+
+args = parse_args()
+now = datetime.now()
+dt_string = now.strftime("%d.%m.%y-%H:%M:%S")
+layers_to_freeze = args.freeze_layers.split(";")
+substitutions_file = 'mappings/substitutions.txt'
+mapping_classes = args.mappings_file[:-4].split('-')[-1]
+# specific_results = open_specific_results('mappings/specific_results.txt')['B']
+set_seed(42)
+
+# ===============
+# Dataset class
+# ===============
+
+class Dataset(torch.utils.data.Dataset):
+
+    def __init__(self, sentences):
+
+        self.labels = [sent[-1] for sent in sentences]
+        self.texts = [tokenizer(sent[-2], 
+                                is_split_into_words=True,                              
+                                padding='max_length', 
+                                max_length = 512, 
+                                truncation=True,
+                                return_tensors="pt") 
+                                for sent in sentences]
+
+    def classes(self):
+        return self.labels
+
+    def __len__(self):
+        return len(self.labels)
+    
+    def get_batch_labels(self, idx):
+        # Fetch a batch of labels
+        return np.array(self.labels[idx])
+
+    def get_batch_texts(self, idx):
+        # Fetch a batch of inputs
+        return self.texts[idx]
+
+    def __getitem__(self, idx):
+
+        batch_texts = self.get_batch_texts(idx)
+        batch_y = self.get_batch_labels(idx)
+
+        return batch_texts, batch_y
+
+# ===============
+# Load datasets
+# ===============
+
+# Open mappings
+mappings, inv_mappings = open_mappings(args.mappings_file)
+batch_size = args.batch_size
+tokenizer  = AutoTokenizer.from_pretrained('xlm-roberta-base')
+
+train_sentences, dev_dict_sentences, test_dict_sentences, framework_labels = open_sentences(args.data_path, mappings)
+
+# Determine linear size (= number of classes in the sets + 1)
+num_labels = len(set(sent[-1] for sent in train_sentences)) + 1
+
+# make train/dev datasets
+train_dataset = Dataset(train_sentences)
+dev_dataset   = {corpus: Dataset(s) for corpus, s in dev_dict_sentences.items()}
+test_dataset  = {corpus: Dataset(s) for corpus, s in test_dict_sentences.items()}
+
+# Make dasets with batches and dataloader
+train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
+dev_dict_dataloader = {corpus: DataLoader(dev_data, batch_size) 
+                        for corpus, dev_data in dev_dataset.items()}
+test_dict_dataloader = {corpus: DataLoader(test_data, batch_size) 
+                        for corpus, test_data in test_dataset.items()}
+
+
+# ===============
+print('\nModel: ', args.transformer_model)
+print('Batch size: ', args.batch_size * args.gradient_accumulation_steps)
+
+# ------- Testing ---------
+
+
+for corpus in dev_dict_dataloader:
+    dev_labels, dev_results = get_better_predictions(model, 
+                                   corpus, 
+                                   dev_dict_dataloader[corpus], 
+                                   framework_labels[corpus.split('.')[1]]
+                                  )
+    
+    
+    old_results = get_predictions(model, 
+                                corpus, 
+                                dev_dict_dataloader[corpus])
+    
+    print('new:', print_better_accuracies(dev_labels, dev_results))
+    
+# for corpus in test_dict_dataloader:
+#     test_results = get_better_predictions(model, 
+#                                        corpus, 
+#                                        test_dict_dataloader[corpus], 
+                                       
+#                                   )
+#     print_results_to_file(corpus, 
+#                           test_dict_sentences[corpus], 
+#                           test_results,
+#                           inv_mappings, 
+#                           substitutions_file)
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
-adapter-transformers==3.0.1
-certifi==2023.5.7
-charset-normalizer
-cmake==3.26.3
-datasets==2.4.0
-fsspec
-huggingface-hub==0.14.1
-idna==3.4
-Jinja2==3.1.2
-joblib==1.2.0
-lit==16.0.3
-MarkupSafe==2.1.2
-mpmath==1.3.0
-multidict==6.0.4
-multiprocess==0.70.13
-networkx==3.1
-packaging==23.1
-pandas==2.0.1
-Pillow==9.5.0
-pyarrow==12.0.0
-python-dateutil==2.8.2
-pytz==2023.3
-PyYAML==6.0
-regex==2023.5.5
-requests==2.30.0
-responses==0.18.0
-sacremoses==0.0.53
-scikit-learn==1.2.2
-scipy==1.10.1
-six==1.16.0
-sympy==1.12
-threadpoolctl==3.1.0
-tokenizers==0.12.1
-torch==2.0.1
-torchaudio==2.0.2
-torchvision
-tqdm==4.65.0
-transformers==4.18.0
-triton==2.0.0
-typing_extensions==4.5.0
-tzdata==2023.3
-urllib3==2.0.2
-xxhash==3.2.0
-yarl==1.9.2
\ No newline at end of file
--- a/run_stuff.sh
+++ b/run_stuff.sh
 #!/usr/bin/env bash

-#SBATCH --job-name=adapters
+#SBATCH --job-name=pytorch-models

 #SBATCH --ntasks=1
 #SBATCH --cpus-per-task=4
-#SBATCH --partition=RTX6000Node
+#SBATCH --partition=GPUNodes
 #SBATCH --gres=gpu:1


 # tests tests

-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 pytorch_classifier.py --batch_size 8 --num_epochs 10 --data_path '/users/melodi/emetheni/clean_data' --mappings_file 'mappings/mappings-classes-braud.tsv'
-
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 huggingface_classifier.py --batch_size 4 --gradient_accumulation_steps 32 --num_epochs 1 --data_path '/users/melodi/emetheni/clean_data' --mappings_file 'mappings/mappings-classes-braud.tsv'
-
-# Train the adapter:
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 make_adapter.py --batch_size 8 --num_epochs 15 --data_path '/users/melodi/emetheni/sharedtask2023/data' --freeze_layers 'layer.1;layer.2;layer.3' --mappings_file 'mappings/mappings-classes-braud.tsv'
-
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 make_adapter.py --batch_size 8 --num_epochs 15 --data_path '/users/melodi/emetheni/sharedtask2023/data' --freeze_layers 'layer.1;layer.2;layer.3;layer.4' --mappings_file 'mappings/mappings-classes-braud.tsv'
-
-srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 make_adapter.py --batch_size 8 --num_epochs 15 --data_path '/users/melodi/emetheni/sharedtask2023/data' --freeze_layers 'layer.1' --mappings_file 'mappings/mappings-classes-braud.tsv'
-
-# Run classifier with adapter for corpora:
-
-
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 1 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 2 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 3 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 4 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 5 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 6 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
-
-
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3  adapter_classifier.py --batch_size 8 --num_epochs 1 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3  adapter_classifier.py --batch_size 8 --num_epochs 2 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3  adapter_classifier.py --batch_size 8 --num_epochs 3 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3  adapter_classifier.py --batch_size 8 --num_epochs 4 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3  adapter_classifier.py --batch_size 8 --num_epochs 5 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3  adapter_classifier.py --batch_size 8 --num_epochs 6 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
+srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 classifier_bare_pytorch.py \
+    --num_epochs 10 \
+    --data_path '/users/melodi/emetheni/clean_data' \
+    --mappings_file 'mappings/mappings_substitutions.tsv' 
+    
+    
+# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 classifier_bare_pytorch.py \
+#     --num_epochs 10 \
+#     --data_path '/users/melodi/emetheni/clean_data' \
+#     --mappings_file 'mappings/mappings_substitutions.tsv' \
+#     --transformer_model "xlm-roberta-base" \
+#     --batch_size 4 \
+#     --gradient_accumulation_steps 32
+    
+# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 classifier_bare_huggingface.py \
+#     --batch_size 4 \
+#     --gradient_accumulation_steps 32 \
+#     --num_epochs 1 \
+#     --data_path '/users/melodi/emetheni/clean_data' \
+#     --mappings_file 'mappings/mappings_substitutions.tsv'
+#     --transformer_model "bert-base-multilingual-cased"
--- a/see_results.ipynb
+++ b/see_results.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import accuracy_score\n",
+    "import os, io\n",
+    "from collections import OrderedDict, Counter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "connectives = {\"elaboration\": [\"and\", \"also\", \"besides\", \"further\", \"furthermore\", \"too\", \"moreover\", \"in addition\", \"then\", \"of equal importance\", \"equally important\", \"another\", \"additionally\", \"also\", \"moreover\", \"furthermore\", \"again\", \"further\", \"then\", \"besides\", \"too\", \"similarly\", \"correspondingly\", \"indeed\", \"regarding\"], \n",
+    "\"time\": [\"next\", \"afterward\", \"finally\", \"later\", \"last\", \"lastly\", \"at last\", \"now\", \"subsequently\", \"then\", \"when\", \"soon\", \"thereafter\", \"after a short time\", \"the next week\", \"a minute later\", \"in the meantime\", \"meanwhile\", \"on the following day\", \"at length\", \"ultimately\", \"presently\"], \n",
+    "\"sequence\": [\"first\", \"second\", \"third\", \"finally\", \"hence\", \"next\", \"then\", \"from here on\", \"to begin with\", \"last of all\", \"after\", \"before\", \"as soon as\", \"in the end\", \"gradually\", \"when\", \"after\", \"after that\", \"afterwards\", \"next\", \"subsequently\", \"later (on)\", \"followed by\", \"to go on to\", \"finally\", \"another\", \"additionally\", \"finally moreover\", \"also\", \"subsequently\", \"eventually\", \"next\", \"then\"], \n",
+    "\"example\": [\"for example\", \"to illustrate\", \"for instance\", \"to be specific\", \"such as\", \"moreover\", \"furthermore\", \"just as important\", \"similarly\", \"in the same way\", \"for example\", \"for instance\", \"namely\", \"such as\", \"as follows\", \"as exemplified by\", \"such as\", \"including\", \"especially\", \"particularly\", \"in particular\", \"notably\", \"mainly\"], \n",
+    "\"result\": [\"as a result\", \"hence\", \"so\", \"accordingly\", \"as a consequence\", \"consequently\", \"thus\", \"since\", \"therefore\", \"for this reason\", \"because of this\", \"therefore\", \"accordingly\", \"as a result of\", \"the result is/results are\", \"the consequence is\", \"resulting from\", \"consequently\", \"it can be seen\", \"evidence illustrates that\", \"because of this\", \"thus\", \"hence\", \"for this reason\", \"owing to x\", \"this suggests that\", \"it follows that\", \"otherwise\", \"in that case\", \"that implies\", \"As a result\", \"therefore\", \"thus\"], \n",
+    "\"purpose\": [\"for this purpose\", \"with this in mind\", \"for this reason\"], \n",
+    "\"comparison\": [\"like\", \"in the same manner\", \"as so\", \"similarly\"], \n",
+    "\"contrast\": [\"but\", \"in contrast\", \"conversely\", \"however\", \"still\", \"nevertheless\", \"nonetheless\", \"yet\", \"and yet\", \"on the other hand\", \"on the contrary\", \"or\", \"in spite of this\", \"actually\", \"in fact\", \"whereas\", \"conversely\", \"in comparison\", \"by contrast\", \"in contrast\", \"contrasting\", \"alternatively\", \"although\", \"otherwise\", \"instead\"], \n",
+    "\"summary\": [\"in summary\", \"to sum up\", \"to repeat\", \"briefly\", \"in short\", \"finally\", \"on the whole\", \"therefore\", \"as I have said\", \"in conclusion\", \"as seen\", \"in conclusion\", \"therefore\", \"to conclude\", \"on the whole\", \"hence\", \"thus to summarise\", \"altogether\", \"overall\"], \n",
+    "\"rephrasing\": [\"in other terms\", \"rather\", \"or\", \"better\", \"in view of this\", \"in contrast\"]}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def parse_data(infile, string_input=False) -> list:\n",
+    "    \"\"\"\n",
+    "    This function is to read a gold or a pred file to obtain the label column for accuracy calculation.\n",
+    "\n",
+    "    :param infile: shared task .rels file\n",
+    "    :param string_input: If True, files are replaced by strings with file contents (for import inside other scripts)\n",
+    "    :return: a list of labels\n",
+    "    \"\"\"\n",
+    "\n",
+    "    if not string_input:\n",
+    "        data = io.open(infile, encoding=\"utf-8\").read().strip().replace(\"\\r\", \"\")\n",
+    "    else:\n",
+    "        data = infile.strip()\n",
+    "\n",
+    "    labels = [line.split(\"\\t\")[-1].lower() \n",
+    "              for i, line in enumerate(data.split(\"\\n\")) if \"\\t\" in line and i>0]\n",
+    "    \n",
+    "    sentences = [(line.split(\"\\t\")[3], line.split(\"\\t\")[4], line.split(\"\\t\")[-3])\n",
+    "                 for i, line in enumerate(data.split(\"\\n\")) if \"\\t\" in line and i>0]\n",
+    "    \n",
+    "    return sentences, labels\n",
+    "\n",
+    "\n",
+    "def get_accuracy_score(gold_file, pred_file, string_input=False) -> dict:\n",
+    "\n",
+    "    _, gold_labels = parse_data(gold_file, string_input)\n",
+    "    _, pred_labels = parse_data(pred_file, string_input)\n",
+    "\n",
+    "    filename = gold_file.split(os.sep)[-1]\n",
+    "\n",
+    "    assert len(gold_labels) == len(pred_labels), \"FATAL: different number of labels detected in gold and pred\"\n",
+    "\n",
+    "    acc = accuracy_score(gold_labels, pred_labels)\n",
+    "\n",
+    "    score_dict = {\"filename\": filename,\n",
+    "                  \"acc_score\": round(acc, 4),\n",
+    "                  \"gold_rel_count\": len(gold_labels),\n",
+    "                  \"pred_rel_count\": len(pred_labels)}\n",
+    "\n",
+    "    return score_dict\n",
+    "\n",
+    "def separate_right_wrong(gold_file, pred_file, string_input=False):\n",
+    "    \n",
+    "    rights = []\n",
+    "    wrongs = []\n",
+    "    \n",
+    "    gold_sents, gold_labels = parse_data(gold_file, string_input)\n",
+    "    pred_sents, pred_labels = parse_data(pred_file, string_input)\n",
+    "    \n",
+    "    for n in range(len(gold_sents)):\n",
+    "        if gold_labels[n] == pred_labels[n]:\n",
+    "            rights.append([gold_sents[n], gold_labels[n], pred_labels[n]])\n",
+    "        else:\n",
+    "            wrongs.append([gold_sents[n], gold_labels[n], pred_labels[n]])\n",
+    "    \n",
+    "    return rights, wrongs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Print accuracies \n",
+    "\n",
+    "model = 'A_15-epochs_frozen-1_2'\n",
+    "corpus = 'eng.dep.covdtb'\n",
+    "\n",
+    "gold_path = '/users/melodi/emetheni/clean_data/'\n",
+    "results_path = 'results/test/' + model + '/'\n",
+    "\n",
+    "corpora = sorted([x[:-4] for x in os.listdir('results/test/' + model) \n",
+    "           if not \"DS\" in x if not 'ipy' in x])\n",
+    "\n",
+    "# for corpus in corpora:\n",
+    "#     score = get_accuracy_score(gold_path + corpus + '/' + corpus + '_test.rels', \n",
+    "#                                results_path + corpus + '.tsv')\n",
+    "\n",
+    "#     print(corpus, '\\t', score['acc_score'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Separate\n",
+    "\n",
+    "# model = 'A_15-epochs_frozen-1_2'\n",
+    "# corpus = 'eng.dep.covdtb'\n",
+    "\n",
+    "model = 'A_15-epochs_frozen-1-2-3_3'\n",
+    "corpus = 'eng.rst.gum'\n",
+    "\n",
+    "gold_path = '/users/melodi/emetheni/clean_data/'\n",
+    "results_path = 'results/test/' + model + '/'\n",
+    "\n",
+    "corpora = sorted([x[:-4] for x in os.listdir('results/test/' + model) \n",
+    "           if not \"DS\" in x if not 'ipy' in x])\n",
+    "\n",
+    "rights, wrongs = separate_right_wrong(gold_path + corpus + '/' + corpus + '_test.rels', \n",
+    "                           results_path + corpus + '.tsv')\n",
+    "\n",
+    "rights_count = dict(OrderedDict(Counter([x[-1] for x in rights])))\n",
+    "wrongs_count = dict(OrderedDict(Counter([x[-1] for x in wrongs])))\n",
+    "\n",
+    "# for label in sorted(set(list(rights_count.keys()) + list(wrongs_count.keys())), reverse=False):\n",
+    "#     if label in rights_count:\n",
+    "#         r = rights_count[label]\n",
+    "#     else:\n",
+    "#         r = 0\n",
+    "#     if label in wrongs_count:\n",
+    "#         w = wrongs_count[label]\n",
+    "#     else:\n",
+    "#         w = 0\n",
+    "#     print(label, '\\t', r, '\\t', w)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "rights 203 / 1657 0.12251056125528063\n",
+      "wrongs 71 / 918 0.07734204793028322\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Presence of connectives in right/wrong sents\n",
+    "\n",
+    "counter = 0\n",
+    "for sent in rights:\n",
+    "    sentence = (sent[0][0] + ' ' + sent[0][1]).lower()\n",
+    "    if sent[1] in connectives:\n",
+    "        if any(x in sentence for x in connectives[sent[1]]):\n",
+    "#             print(sent)\n",
+    "            counter += 1\n",
+    "print('rights', counter, '/', len(rights), counter/len(rights))\n",
+    "\n",
+    "counter = 0\n",
+    "for sent in wrongs:\n",
+    "    \n",
+    "    sentence = (sent[0][0] + ' ' + sent[0][1]).lower()\n",
+    "    if sent[1] in connectives:\n",
+    "        if any(x in sentence for x in connectives[sent[1]]):\n",
+    "#             print(sent)\n",
+    "            counter += 1\n",
+    "print('wrongs', counter, '/', len(wrongs), counter/len(wrongs))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "rights 1253 / 1657 0.756185878092939\n",
+      "wrongs 735 / 918 0.8006535947712419\n"
+     ]
+    }
+   ],
+   "source": [
+    "# See direction\n",
+    "\n",
+    "counter = 0\n",
+    "for sent in rights:\n",
+    "    if sent[0][2] == '1<2':\n",
+    "        counter += 1\n",
+    "print('rights', counter, '/', len(rights), counter/len(rights))\n",
+    "\n",
+    "counter = 0\n",
+    "for sent in wrongs:\n",
+    "    if sent[0][2] == '1<2':\n",
+    "        counter += 1\n",
+    "print('wrongs', counter, '/', len(wrongs), counter/len(wrongs))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[('The prevalence of discrimination across racial groups in contemporary America :',\n",
+       "   'The current study seeks to build on this research',\n",
+       "   '1>2'),\n",
+       "  'organization',\n",
+       "  'organization'],\n",
+       " [('The prevalence of discrimination across racial groups in contemporary America :',\n",
+       "   'Results from a nationally representative sample of adults',\n",
+       "   '1<2'),\n",
+       "  'elaboration',\n",
+       "  'elaboration'],\n",
+       " [('Introduction .',\n",
+       "   'The current study seeks to build on this research',\n",
+       "   '1>2'),\n",
+       "  'organization',\n",
+       "  'organization'],\n",
+       " [('Personal experiences of discrimination and bias have been the focus of much social science research .',\n",
+       "   'In many respects , researchers already possess a wealth of knowledge',\n",
+       "   '1>2'),\n",
+       "  'context',\n",
+       "  'context'],\n",
+       " [('Personal experiences of discrimination and bias have been the focus of much social science research .',\n",
+       "   '[ 1 - 3 ]',\n",
+       "   '1<2'),\n",
+       "  'explanation',\n",
+       "  'explanation'],\n",
+       " [('Sociologists have explored the adverse consequences of discrimination',\n",
+       "   '[ 3 – 5 ] ;',\n",
+       "   '1<2'),\n",
+       "  'explanation',\n",
+       "  'explanation'],\n",
+       " [('Sociologists have explored the adverse consequences of discrimination',\n",
+       "   'psychologists have examined the mental processes',\n",
+       "   '1<2'),\n",
+       "  'joint',\n",
+       "  'joint'],\n",
+       " [('psychologists have examined the mental processes',\n",
+       "   'that underpin conscious and unconscious biases',\n",
+       "   '1<2'),\n",
+       "  'elaboration',\n",
+       "  'elaboration'],\n",
+       " [('psychologists have examined the mental processes', '[ 6 ] ;', '1<2'),\n",
+       "  'explanation',\n",
+       "  'explanation'],\n",
+       " [('Sociologists have explored the adverse consequences of discrimination',\n",
+       "   'neuroscientists have examined the neurobiological underpinnings of discrimination',\n",
+       "   '1<2'),\n",
+       "  'joint',\n",
+       "  'joint']]"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rights[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subs = {\"Attribution\": [\"attribution\", \"attribution-negative\"],\n",
+    " \"Background\": [\"background\", \"circumstance\"],\n",
+    " \"Cause\": [\"cause\", \"result\", \"consequence\"],\n",
+    " \"Comparison\": [\"comparison\", \"preference\", \"analogy\", \"proportion\"],\n",
+    " \"Condition\": [\"condition\", \"hypothetical\", \"contingency\", \"otherwise\"],\n",
+    " \"Contrast\": [\"contrast\", \"concession\", \"antithesis\"],\n",
+    " \"Elaboration\": [\"elaboration-additional\", \"elaboration-general-specific\", \"elaboration-part-whole\", \"elaboration-process-step\", \"elaboration-object-attribute\", \"elaboration-set-member\", \"example\", \"definition\"],\n",
+    " \"Enablement\": [\"purpose\", \"enablement\"],\n",
+    " \"Evaluation\": [\"evaluation\", \"interpretation\", \"conclusion\", \"comment\"],\n",
+    " \"Explanation\": [\"evidence\", \"explanation-argumentative\", \"reason\"],\n",
+    " \"Joint\": [\"list\", \"disjunction\"],\n",
+    " \"Manner-Means\": [\"manner\", \"means\"],\n",
+    " \"Topic-Comment\": [\"problem-solution\", \"question-answer\", \"statement-response\", \"topic-comment\", \"comment-topic\", \"rhetorical-question\"],\n",
+    " \"Summary\": [\"summary\", \"restatement\"],\n",
+    " \"Temporal\": [\"temporal-before\", \"temporal-after\", \"temporal-same-time\", \"sequence\", \"inverted-sequence\"],\n",
+    " \"Topic Change\": [\"topic-shift\", \"topic-drift\"]}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rst = [\"bg-general\", \"elab-addition\", \"manner-means\", \"attribution\", \"evaluation\", \"enablement\", \"elab-aspect\", \"joint\", \"temporal\", \"result\", \"bg-goal\", \"progression\", \"contrast\", \"elab-process_step\", \"elab-enumember\", \"comparison\", \"cause\", \"exp-reason\", \"exp-evidence\", \"condition\", \"summary\", \"bg-compare\", \"elab-example\", \"elab-definition\", \"cause-result\", \"findings\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "bg-general \t \n",
+      "elab-addition \t \n",
+      "manner-means \t manner-means\n",
+      "attribution \t attribution\n",
+      "evaluation \t evaluation\n",
+      "enablement \t enablement\n",
+      "elab-aspect \t \n",
+      "joint \t \n",
+      "temporal \t \n",
+      "result \t cause\n",
+      "bg-goal \t \n",
+      "progression \t \n",
+      "contrast \t contrast\n",
+      "elab-process_step \t \n",
+      "elab-enumember \t \n",
+      "comparison \t comparison\n",
+      "cause \t cause\n",
+      "exp-reason \t explanation\n",
+      "exp-evidence \t explanation\n",
+      "condition \t condition\n",
+      "summary \t summary\n",
+      "bg-compare \t \n",
+      "elab-example \t elaboration\n",
+      "elab-definition \t elaboration\n",
+      "cause-result \t cause\n",
+      "findings \t \n"
+     ]
+    }
+   ],
+   "source": [
+    "for label in rst:\n",
+    "    temp = ''\n",
+    "    for k, v in subs.items():\n",
+    "        if label in v:\n",
+    "            temp = k.lower()\n",
+    "        elif '-' in label:\n",
+    "            for l in label.split('-'):\n",
+    "                if l in v:\n",
+    "                    temp = temp = k.lower()\n",
+    "        elif '.' in label:\n",
+    "            for l in label.split('.'):\n",
+    "                if l in v:\n",
+    "                    temp = temp = k.lower()\n",
+    "        \n",
+    "    print(label, '\\t', temp)\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
+%% Cell type:code id: tags:
+
+``` python
+from sklearn.metrics import accuracy_score
+import os, io
+from collections import OrderedDict, Counter
+```
+
+%% Cell type:code id: tags:
+
+``` python
+connectives = {"elaboration": ["and", "also", "besides", "further", "furthermore", "too", "moreover", "in addition", "then", "of equal importance", "equally important", "another", "additionally", "also", "moreover", "furthermore", "again", "further", "then", "besides", "too", "similarly", "correspondingly", "indeed", "regarding"],
+"time": ["next", "afterward", "finally", "later", "last", "lastly", "at last", "now", "subsequently", "then", "when", "soon", "thereafter", "after a short time", "the next week", "a minute later", "in the meantime", "meanwhile", "on the following day", "at length", "ultimately", "presently"],
+"sequence": ["first", "second", "third", "finally", "hence", "next", "then", "from here on", "to begin with", "last of all", "after", "before", "as soon as", "in the end", "gradually", "when", "after", "after that", "afterwards", "next", "subsequently", "later (on)", "followed by", "to go on to", "finally", "another", "additionally", "finally moreover", "also", "subsequently", "eventually", "next", "then"],
+"example": ["for example", "to illustrate", "for instance", "to be specific", "such as", "moreover", "furthermore", "just as important", "similarly", "in the same way", "for example", "for instance", "namely", "such as", "as follows", "as exemplified by", "such as", "including", "especially", "particularly", "in particular", "notably", "mainly"],
+"result": ["as a result", "hence", "so", "accordingly", "as a consequence", "consequently", "thus", "since", "therefore", "for this reason", "because of this", "therefore", "accordingly", "as a result of", "the result is/results are", "the consequence is", "resulting from", "consequently", "it can be seen", "evidence illustrates that", "because of this", "thus", "hence", "for this reason", "owing to x", "this suggests that", "it follows that", "otherwise", "in that case", "that implies", "As a result", "therefore", "thus"],
+"purpose": ["for this purpose", "with this in mind", "for this reason"],
+"comparison": ["like", "in the same manner", "as so", "similarly"],
+"contrast": ["but", "in contrast", "conversely", "however", "still", "nevertheless", "nonetheless", "yet", "and yet", "on the other hand", "on the contrary", "or", "in spite of this", "actually", "in fact", "whereas", "conversely", "in comparison", "by contrast", "in contrast", "contrasting", "alternatively", "although", "otherwise", "instead"],
+"summary": ["in summary", "to sum up", "to repeat", "briefly", "in short", "finally", "on the whole", "therefore", "as I have said", "in conclusion", "as seen", "in conclusion", "therefore", "to conclude", "on the whole", "hence", "thus to summarise", "altogether", "overall"],
+"rephrasing": ["in other terms", "rather", "or", "better", "in view of this", "in contrast"]}
+```
+
+%% Cell type:code id: tags:
+
+``` python
+def parse_data(infile, string_input=False) -> list:
+    """
+    This function is to read a gold or a pred file to obtain the label column for accuracy calculation.
+
+    :param infile: shared task .rels file
+    :param string_input: If True, files are replaced by strings with file contents (for import inside other scripts)
+    :return: a list of labels
+    """
+
+    if not string_input:
+        data = io.open(infile, encoding="utf-8").read().strip().replace("\r", "")
+    else:
+        data = infile.strip()
+
+    labels = [line.split("\t")[-1].lower()
+              for i, line in enumerate(data.split("\n")) if "\t" in line and i>0]
+
+    sentences = [(line.split("\t")[3], line.split("\t")[4], line.split("\t")[-3])
+                 for i, line in enumerate(data.split("\n")) if "\t" in line and i>0]
+
+    return sentences, labels
+
+
+def get_accuracy_score(gold_file, pred_file, string_input=False) -> dict:
+
+    _, gold_labels = parse_data(gold_file, string_input)
+    _, pred_labels = parse_data(pred_file, string_input)
+
+    filename = gold_file.split(os.sep)[-1]
+
+    assert len(gold_labels) == len(pred_labels), "FATAL: different number of labels detected in gold and pred"
+
+    acc = accuracy_score(gold_labels, pred_labels)
+
+    score_dict = {"filename": filename,
+                  "acc_score": round(acc, 4),
+                  "gold_rel_count": len(gold_labels),
+                  "pred_rel_count": len(pred_labels)}
+
+    return score_dict
+
+def separate_right_wrong(gold_file, pred_file, string_input=False):
+
+    rights = []
+    wrongs = []
+
+    gold_sents, gold_labels = parse_data(gold_file, string_input)
+    pred_sents, pred_labels = parse_data(pred_file, string_input)
+
+    for n in range(len(gold_sents)):
+        if gold_labels[n] == pred_labels[n]:
+            rights.append([gold_sents[n], gold_labels[n], pred_labels[n]])
+        else:
+            wrongs.append([gold_sents[n], gold_labels[n], pred_labels[n]])
+
+    return rights, wrongs
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Print accuracies
+
+model = 'A_15-epochs_frozen-1_2'
+corpus = 'eng.dep.covdtb'
+
+gold_path = '/users/melodi/emetheni/clean_data/'
+results_path = 'results/test/' + model + '/'
+
+corpora = sorted([x[:-4] for x in os.listdir('results/test/' + model)
+           if not "DS" in x if not 'ipy' in x])
+
+# for corpus in corpora:
+#     score = get_accuracy_score(gold_path + corpus + '/' + corpus + '_test.rels',
+#                                results_path + corpus + '.tsv')
+
+#     print(corpus, '\t', score['acc_score'])
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Separate
+
+# model = 'A_15-epochs_frozen-1_2'
+# corpus = 'eng.dep.covdtb'
+
+model = 'A_15-epochs_frozen-1-2-3_3'
+corpus = 'eng.rst.gum'
+
+gold_path = '/users/melodi/emetheni/clean_data/'
+results_path = 'results/test/' + model + '/'
+
+corpora = sorted([x[:-4] for x in os.listdir('results/test/' + model)
+           if not "DS" in x if not 'ipy' in x])
+
+rights, wrongs = separate_right_wrong(gold_path + corpus + '/' + corpus + '_test.rels',
+                           results_path + corpus + '.tsv')
+
+rights_count = dict(OrderedDict(Counter([x[-1] for x in rights])))
+wrongs_count = dict(OrderedDict(Counter([x[-1] for x in wrongs])))
+
+# for label in sorted(set(list(rights_count.keys()) + list(wrongs_count.keys())), reverse=False):
+#     if label in rights_count:
+#         r = rights_count[label]
+#     else:
+#         r = 0
+#     if label in wrongs_count:
+#         w = wrongs_count[label]
+#     else:
+#         w = 0
+#     print(label, '\t', r, '\t', w)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Presence of connectives in right/wrong sents
+
+counter = 0
+for sent in rights:
+    sentence = (sent[0][0] + ' ' + sent[0][1]).lower()
+    if sent[1] in connectives:
+        if any(x in sentence for x in connectives[sent[1]]):
+#             print(sent)
+            counter += 1
+print('rights', counter, '/', len(rights), counter/len(rights))
+
+counter = 0
+for sent in wrongs:
+
+    sentence = (sent[0][0] + ' ' + sent[0][1]).lower()
+    if sent[1] in connectives:
+        if any(x in sentence for x in connectives[sent[1]]):
+#             print(sent)
+            counter += 1
+print('wrongs', counter, '/', len(wrongs), counter/len(wrongs))
+```
+
+%% Output
+
+    rights 203 / 1657 0.12251056125528063
+    wrongs 71 / 918 0.07734204793028322
+
+%% Cell type:code id: tags:
+
+``` python
+# See direction
+
+counter = 0
+for sent in rights:
+    if sent[0][2] == '1<2':
+        counter += 1
+print('rights', counter, '/', len(rights), counter/len(rights))
+
+counter = 0
+for sent in wrongs:
+    if sent[0][2] == '1<2':
+        counter += 1
+print('wrongs', counter, '/', len(wrongs), counter/len(wrongs))
+```
+
+%% Output
+
+    rights 1253 / 1657 0.756185878092939
+    wrongs 735 / 918 0.8006535947712419
+
+%% Cell type:code id: tags:
+
+``` python
+rights[:10]
+```
+
+%% Output
+
+    [[('The prevalence of discrimination across racial groups in contemporary America :',
+       'The current study seeks to build on this research',
+       '1>2'),
+      'organization',
+      'organization'],
+     [('The prevalence of discrimination across racial groups in contemporary America :',
+       'Results from a nationally representative sample of adults',
+       '1<2'),
+      'elaboration',
+      'elaboration'],
+     [('Introduction .',
+       'The current study seeks to build on this research',
+       '1>2'),
+      'organization',
+      'organization'],
+     [('Personal experiences of discrimination and bias have been the focus of much social science research .',
+       'In many respects , researchers already possess a wealth of knowledge',
+       '1>2'),
+      'context',
+      'context'],
+     [('Personal experiences of discrimination and bias have been the focus of much social science research .',
+       '[ 1 - 3 ]',
+       '1<2'),
+      'explanation',
+      'explanation'],
+     [('Sociologists have explored the adverse consequences of discrimination',
+       '[ 3 – 5 ] ;',
+       '1<2'),
+      'explanation',
+      'explanation'],
+     [('Sociologists have explored the adverse consequences of discrimination',
+       'psychologists have examined the mental processes',
+       '1<2'),
+      'joint',
+      'joint'],
+     [('psychologists have examined the mental processes',
+       'that underpin conscious and unconscious biases',
+       '1<2'),
+      'elaboration',
+      'elaboration'],
+     [('psychologists have examined the mental processes', '[ 6 ] ;', '1<2'),
+      'explanation',
+      'explanation'],
+     [('Sociologists have explored the adverse consequences of discrimination',
+       'neuroscientists have examined the neurobiological underpinnings of discrimination',
+       '1<2'),
+      'joint',
+      'joint']]
+
+%% Cell type:code id: tags:
+
+``` python
+subs = {"Attribution": ["attribution", "attribution-negative"],
+ "Background": ["background", "circumstance"],
+ "Cause": ["cause", "result", "consequence"],
+ "Comparison": ["comparison", "preference", "analogy", "proportion"],
+ "Condition": ["condition", "hypothetical", "contingency", "otherwise"],
+ "Contrast": ["contrast", "concession", "antithesis"],
+ "Elaboration": ["elaboration-additional", "elaboration-general-specific", "elaboration-part-whole", "elaboration-process-step", "elaboration-object-attribute", "elaboration-set-member", "example", "definition"],
+ "Enablement": ["purpose", "enablement"],
+ "Evaluation": ["evaluation", "interpretation", "conclusion", "comment"],
+ "Explanation": ["evidence", "explanation-argumentative", "reason"],
+ "Joint": ["list", "disjunction"],
+ "Manner-Means": ["manner", "means"],
+ "Topic-Comment": ["problem-solution", "question-answer", "statement-response", "topic-comment", "comment-topic", "rhetorical-question"],
+ "Summary": ["summary", "restatement"],
+ "Temporal": ["temporal-before", "temporal-after", "temporal-same-time", "sequence", "inverted-sequence"],
+ "Topic Change": ["topic-shift", "topic-drift"]}
+```
+
+%% Cell type:code id: tags:
+
+``` python
+rst = ["bg-general", "elab-addition", "manner-means", "attribution", "evaluation", "enablement", "elab-aspect", "joint", "temporal", "result", "bg-goal", "progression", "contrast", "elab-process_step", "elab-enumember", "comparison", "cause", "exp-reason", "exp-evidence", "condition", "summary", "bg-compare", "elab-example", "elab-definition", "cause-result", "findings"]
+```
+
+%% Cell type:code id: tags:
+
+``` python
+for label in rst:
+    temp = ''
+    for k, v in subs.items():
+        if label in v:
+            temp = k.lower()
+        elif '-' in label:
+            for l in label.split('-'):
+                if l in v:
+                    temp = temp = k.lower()
+        elif '.' in label:
+            for l in label.split('.'):
+                if l in v:
+                    temp = temp = k.lower()
+
+    print(label, '\t', temp)
+
+```
+
+%% Output
+
+    bg-general
+    elab-addition
+    manner-means 	 manner-means
+    attribution 	 attribution
+    evaluation 	 evaluation
+    enablement 	 enablement
+    elab-aspect
+    joint
+    temporal
+    result 	 cause
+    bg-goal
+    progression
+    contrast 	 contrast
+    elab-process_step
+    elab-enumember
+    comparison 	 comparison
+    cause 	 cause
+    exp-reason 	 explanation
+    exp-evidence 	 explanation
+    condition 	 condition
+    summary 	 summary
+    bg-compare
+    elab-example 	 elaboration
+    elab-definition 	 elaboration
+    cause-result 	 cause
+    findings
+
+%% Cell type:code id: tags:
+
+``` python
+```
--- a/utils.py
+++ b/utils.py
 #!/usr/bin/env python
 # coding: utf-8

-import os
+import os, io
 import torch
 from transformers import AutoConfig, AutoTokenizer
 from configure import parse_args
@@ -66,12 +66,81 @@ def open_file(filename, mappings_dict):
    return lines


+def open_file_with_lang(filename, mappings_dict):   
+    
+    ''' Same as above, but first token is language '''
+    
+    max_len = 254 # 512 (max bert len) / 2 (2 sents) -2 (special tokens)
+    lines = []
+    SEP_token = '[SEP]'
+    
+    langs = {'deu':'German', 
+            'eng':'English',
+            'eus': 'Basque',
+            'fas':'Farsi',
+            'fra':'French', 
+            'ita':'Italian', 
+            'nld':'Dutch',
+            'por':'Portuguese', 
+            'rus': 'Russian', 
+            'spa': 'Spanish', 
+            'tur': 'Turkish',
+            'tha': 'Thai', 
+            'zho': 'Chinese'
+            }
+    
+    lenguas = {'deu':'Deutsch', 
+            'eng':'English',
+            'eus': 'Euskara',
+            'fas':'فارسی',
+            'fra':'Français', 
+            'ita':'Italiano', 
+            'nld':'Nederlands',
+            'por':'Português', 
+            'rus': 'русский', 
+            'spa': 'español', 
+            'tur': 'Türkçe',
+            'tha': 'ภาษาไทย', 
+            'zho': '中文'
+            }
+    
+    with open(filename, 'r', encoding='utf-8') as f:
+        next(f)
+        
+        lang = langs[filename.split('/')[-2].split('.')[0]]
+        framework = filename.split('/')[-2].split('.')[1]
+        fullname = filename.split('/')[-2]
+        
+        for line in f:
+            l = line.strip().split('\t')
+            
+            if len(l) > 1:
+                # chop the sentences to max_len if too long
+                sent_1 = l[3].split(' ')
+                sent_2 = l[4].split(' ')      
+                
+                if len(sent_1) > max_len:
+                    sent_1 = sent_1[:max_len]
+                if len(sent_2) > max_len:
+                    sent_2 = sent_2[:max_len]
+                
+                # flip them if different direction
+                if args.normalize_direction == 'yes':
+                    if l[9] == '1>2':
+                        lines.append(l + [[lang, fullname] + sent_1 + [SEP_token] + sent_2, mappings_dict[l[11].lower()]])
+                    else:
+                        lines.append(l + [[lang, fullname] + sent_2 + [SEP_token] + sent_1, mappings_dict[l[11].lower()]])
+                else:
+                    lines.append(l + [[lang, fullname] + sent_1 + [SEP_token] + sent_2, mappings[l[11].lower()]])
+
+    return lines
+
 def encode_batch(batch):
    
    """ Encodes a batch of input data using the model tokenizer.
        Works for a pandas DF column, instead of a list.
    """
-    tokenizer = AutoTokenizer.from_pretrained(args.transformer_model)
+    tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')
    return tokenizer(batch["text"], 
                     max_length=512, 
                     truncation=True, 
@@ -85,6 +154,7 @@ def open_sentences(path_to_corpora, mappings_dict):
        - list of sentences for TRAIN: all the corpora and surprise corpora together
        - dict of sentences for DEV: each dev set categorized per corpus
        - dict of sentences for TEST: each test set categorized per corpus
+        - ** NEW ** : dict of labels per framework
    '''
    
    corpora = [folder for folder in os.listdir(path_to_corpora) 
@@ -94,15 +164,79 @@ def open_sentences(path_to_corpora, mappings_dict):
    train_sentences     = []
    dev_dict_sentences  = {}
    test_dict_sentences = {}
+    
+    all_labels = {}

    for corpus in corpora:
+        framework = corpus.split('.')[-2]
+        if not framework in all_labels:
+            all_labels[framework] = []
        
+        # ===== open train ====
        try:
            # open normal files   
            train_file = ['/'.join([path_to_corpora, corpus, x])
                              for x in os.listdir(path_to_corpora + '/' + corpus) 
                              if 'train' in x and 'rels' in x][0]
-            train_sentences += open_file(train_file, mappings_dict)
+            temp = open_file(train_file, mappings_dict)
+            # train_sentences += open_file_with_lang(train_file, mappings_dict)
+            train_sentences += temp
+            all_labels[framework] += [l[-1] for l in temp]
+
+        except: # some of them don't have train
+            pass
+        
+        # ======== open dev ========
+        dev_dict_sentences[corpus] = []
+        dev_file = ['/'.join([path_to_corpora,corpus,x])
+                              for x in os.listdir(path_to_corpora + '/' + corpus) 
+                              if 'dev' in x and 'rels' in x][0] 
+        temp = open_file(dev_file, mappings_dict)
+        dev_dict_sentences[corpus] += temp
+        all_labels[framework] += [l[-1] for l in temp]  
+#         dev_dict_sentences[corpus] += open_file_with_lang(dev_file, mappings_dict)
+
+        # ======== open test ========
+        test_dict_sentences[corpus] = []
+        test_file = ['/'.join([path_to_corpora,corpus,x])
+                              for x in os.listdir(path_to_corpora + '/' + corpus) 
+                              if 'test' in x and 'rels' in x][0] 
+        temp = open_file(test_file, mappings_dict)
+        test_dict_sentences[corpus] += temp
+        all_labels[framework] += [l[-1] for l in temp]  
+#         test_dict_sentences[corpus] += open_file_with_lang(test_file, mappings_dict)
+
+    labels = {framework:set(all_labels[framework]) for framework in all_labels}
+    
+    return train_sentences, dev_dict_sentences, test_dict_sentences, labels
+
+
+def open_sentences_with_lang(path_to_corpora, mappings_dict):
+    ''' Opens all the corpora and the surprise corpora in train/dev/test sets.
+        Uses the open_file() function from utils.
+        Returns:
+        - list of sentences for TRAIN: all the corpora and surprise corpora together
+        - dict of sentences for DEV: each dev set categorized per corpus
+        - dict of sentences for TEST: each test set categorized per corpus
+    '''
+    
+    corpora = [folder for folder in os.listdir(path_to_corpora) 
+               if not any(i in folder for i in ['.md', 'DS_', 'utils', 'ipynb'])]
+               
+    # ---------------------
+    train_sentences     = []
+    dev_dict_sentences  = {}
+    test_dict_sentences = {}
+
+    for corpus in corpora:
+        
+        try:
+            # open normal files   
+            train_file = ['/'.join([path_to_corpora, corpus, x])
+                              for x in os.listdir(path_to_corpora + '/' + corpus) 
+                              if 'train' in x and 'rels' in x][0]
+            train_sentences += open_file_with_lang(train_file, mappings_dict)
+#             train_sentences += open_file_with_lang(train_file, mappings_dict)
        except: # some of them don't have train
            pass

@@ -111,19 +245,22 @@ def open_sentences(path_to_corpora, mappings_dict):
        dev_file = ['/'.join([path_to_corpora,corpus,x])
                              for x in os.listdir(path_to_corpora + '/' + corpus) 
                              if 'dev' in x and 'rels' in x][0] 
-        dev_dict_sentences[corpus] += open_file(dev_file, mappings_dict)
+        dev_dict_sentences[corpus] += open_file_with_lang(dev_file, mappings_dict)
+#         dev_dict_sentences[corpus] += open_file_with_lang(dev_file, mappings_dict)

        #open each test separately
        test_dict_sentences[corpus] = []
        test_file = ['/'.join([path_to_corpora,corpus,x])
                              for x in os.listdir(path_to_corpora + '/' + corpus) 
                              if 'test' in x and 'rels' in x][0] 
-        test_dict_sentences[corpus] += open_file(test_file, mappings_dict)
+        test_dict_sentences[corpus] += open_file_with_lang(test_file, mappings_dict)
+#         test_dict_sentences[corpus] += open_file_with_lang(test_file, mappings_dict)

    
    return train_sentences, dev_dict_sentences, test_dict_sentences


+
 # ===============
 # Testing functions
 # ===============
@@ -186,15 +323,72 @@ def get_predictions_huggingface(trainer,
    '''

    results = trainer.predict(test_set)
-    preds = np.argmax(results.predictions, axis=1)
+    preds = np.softmax(results.predictions, axis=1)
+    top_preds = np.argmax(results.predictions, axis=1)
    results = results.label_ids
-    test_acc = round(accuracy_score(preds, results), 4)
+    test_acc = round(accuracy_score(top_preds, results), 4)
    
    if print_results:
-        print(corpus, '\tAccuracy:\t', test_acc, '\n')
+        print(corpus, '\t', test_acc, '\n')
    
    return preds
+
+
+def get_better_predictions(model,
+                            corpus, 
+                            test_dataloader, 
+                            corpus_labels,
+                            print_results=True):
+    
+    device = torch.device("cuda" if args.use_cuda else "cpu")
+
+    if args.use_cuda:
+        model = model.cuda()
+    
+    model.eval()
+    all_labels = []
+    all_preds = []
+    
+    with torch.no_grad():
+        for test_input, test_label in test_dataloader:
+
+            mask = test_input['attention_mask'].to(device)
+            input_id = test_input['input_ids'].squeeze(1).to(device)
+            output = model(input_id, mask)
+
+            logits = output[0]
+            logits = logits.detach().cpu().numpy()
+            label_ids = test_label.to('cpu').numpy()
+
+            #all_labels += label_ids.tolist()
+            batch_labels = label_ids.tolist()
+            batch_probs = []
+            for p in output.softmax(dim=-1).tolist():
+                batch_probs.append(dict(enumerate(p)))
+                
+            for probs in batch_probs:
+                final_probs = {}
+                sorted_probs = dict(sorted(probs.items(), key=lambda item:item[1]))
+                for pred_label in sorted_probs:
+                    if pred_label in corpus_labels:
+                        final_probs[pred_label] = sorted_probs[pred_label]
+                        
+                all_preds += [final_probs]
+            
+            all_labels += batch_labels
+           
+    # get the top predictions in order to get the acc
+    
+    top_preds = []
+    for probs in all_preds:
+        top_preds.append(max(zip(probs.values(), probs.keys()))[1])
+    test_acc = round(accuracy_score(labels, top_preds), 4)
+    
+    if print_results:
+        print('better:', '\t', test_acc)
    
+    return all_labels, all_preds
+
    
 def print_results_to_file(corpus, 
                          test_sentences, 
@@ -256,4 +450,57 @@ def print_results_to_file(corpus,
        f.write(header + '\n')
        for line in results_to_write:
            f.write('\t'.join([str(x) for x in line]))
-            f.write('\n')
\ No newline at end of file
+            f.write('\n')
+            
+# ----------
+# janet's functions, modified
+
+
+def parse_data(infile, string_input=False) -> list:
+    """
+    This function is to read a gold or a pred file to obtain the label column for accuracy calculation.
+
+    :param infile: shared task .rels file
+    :param string_input: If True, files are replaced by strings with file contents (for import inside other scripts)
+    :return: a list of labels
+    """
+
+    if not string_input:
+        data = io.open(infile, encoding="utf-8").read().strip().replace("\r", "")
+    else:
+        data = infile.strip()
+
+    ### to change for safety .lower()
+    labels = [line.split("\t")[-1].lower() 
+              for i, line in enumerate(data.split("\n")) if "\t" in line and i>0]
+
+
+    return labels
+
+
+def get_accuracy_score(gold_labels, pred_labels):
+    """
+    This function is to obtain the gold and predicted labels from their respective .rels file
+    and compute the accuracy score.
+
+    :param gold_file: Gold shared task file
+    :param pred_file: File with predictions
+    :param string_input: If True, files are replaced by strings with file contents (for import inside other scripts)
+    :return: dictionary of scores for printing
+    """
+
+#     gold_labels = parse_data(gold_file, string_input)
+#     pred_labels = parse_data(pred_file, string_input)
+
+#     filename = gold_file.split(os.sep)[-1]
+
+    assert len(gold_labels) == len(pred_labels), "FATAL: different number of labels detected in gold and pred"
+
+    acc = accuracy_score(gold_labels, pred_labels)
+
+    score_dict = {#"filename": filename,
+                  "acc_score": acc,
+                  "gold_rel_count": len(gold_labels),
+                  "pred_rel_count": len(pred_labels)}
+
+    return score_dict