diff --git a/classifier_bare_huggingface.py b/classifier_bare_huggingface.py
index 22c0509031653b1a9978438fa2d78758ab5b7a5f..bf1e7203cb8a0c1e10a6de3021546907259d03b7 100644
--- a/classifier_bare_huggingface.py
+++ b/classifier_bare_huggingface.py
@@ -16,7 +16,7 @@ from utils import *
 
 device = torch.device("cuda")
 
-print('\n\nwith Language token - eng + Corpus (no framework) \n')
+# print('\n\nwith Language token - eng + Corpus (no framework) \n')
 
 # ---------------------------------------------------------------------------------------------------
 args = parse_args()
@@ -41,10 +41,6 @@ mappings, inv_mappings = open_mappings(args.mappings_file)
 # Open sentences
 train_sentences, dev_dict_sentences, test_dict_sentences, _ = open_sentences(args.data_path, mappings)
 
-print('\nCheck encodings:\n')
-print(train_sentences[0])
-
-
 # make pandas dataframes
 file_header = ['text', 'labels']
 
diff --git a/classifier_bare_pytorch.py b/classifier_bare_pytorch.py
index b1ee85aa96ec57acf0ccd90cac805908b146d2b5..d802a097c6e1a086d27dfb9603945fb9caa870c0 100644
--- a/classifier_bare_pytorch.py
+++ b/classifier_bare_pytorch.py
@@ -26,7 +26,7 @@ substitutions_file = 'mappings/substitutions.txt'
 # mapping_classes = args.mappings_file[:-4].split('-')[-1]
 # specific_results = open_specific_results('mappings/specific_results.txt')['B']
 
-print('ZERO-SHOT LANG: '+ args.langs_to_use)
+print('\nlangs to use: '+ args.langs_to_use + '\n')
 
 set_seed(42)
 torch.manual_seed(42)
@@ -80,8 +80,6 @@ tokenizer  = AutoTokenizer.from_pretrained(args.transformer_model)
 
 train_sentences, dev_dict_sentences, test_dict_sentences, framework_labels = open_sentences(args.data_path, mappings)
 
-
-print(framework_labels, flush=True)
 # Determine linear size (= number of classes in the sets + 1)
 num_labels = len(set(sent[-1] for sent in train_sentences)) + 1
 
diff --git a/configure.py b/configure.py
index 0eb23b91d67ac8a936c1305be187fc29ab18d8a0..52ecdbdfc3af336e742e2c848482049c7e619bdf 100644
--- a/configure.py
+++ b/configure.py
@@ -53,7 +53,7 @@ def parse_args():
                         help="Change order of sentences when the direction of relations is 1<2 to 2>1.") 
     
     # only specific languages/corpora
-    parser.add_argument("--langs_to_use", default='', type=str, 
+    parser.add_argument("--langs_to_use", default='@', type=str, 
                         help="List of languages/corpora to use, a str separated by ;")   
     
             
diff --git a/get_predictions.sh b/get_predictions.sh
deleted file mode 100644
index 8c5f7ddf38399331949c3787fe395c9506883863..0000000000000000000000000000000000000000
--- a/get_predictions.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env bash
-
-#SBATCH --job-name=model-LC
-
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=4
-#SBATCH --partition=GPUNodes
-#SBATCH --gres=gpu:1
-
-
-# tests tests
-
-
-# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 xml-roberta-classifier.py \
-#     --batch_size 4 \
-#     --gradient_accumulation_steps 32 \
-#     --num_epochs 6 \
-#     --data_path '/users/melodi/emetheni/clean_data' \
-#     --mappings_file 'mappings/mappings_substitutions.tsv' \
-#     --transformer_model "xlm-roberta-base"
-   
-    
-srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 open_finetuned_model.py \
-    --data_path '/users/melodi/emetheni/clean_data' \
-    --mappings_file 'mappings/mappings_substitutions.tsv' \
-    --transformer_model 'results/models/run_xlm-roberta-base/checkpoint-13500'
diff --git a/make_mappings_zero-shot.py b/make_mappings_zero-shot.py
index 14b05a6b31b50e80c52394a7f32391f6aa16f124..c2fd034201d102175663a529dbb948217577094d 100644
--- a/make_mappings_zero-shot.py
+++ b/make_mappings_zero-shot.py
@@ -29,23 +29,23 @@ for label, num in mappings.items():
         
 
 # -----------------------------------
-# define which language to NOT use with the arguments
-not_language = args.langs_to_use
+# define which language to use with the arguments
+languages = args.langs_to_use.split(';')
 
 
-corpora = [folder for folder in os.listdir(args.data_path) 
-           if not not_language in folder]
+corpora = [folder 
+           for folder in os.listdir(args.data_path) 
+           if any(l in folder for l in languages)]
 
 files = ['/'.join([args.data_path, corpus, f])
          for corpus in corpora
-         for f in os.listdir(args.data_path + '/' + corpus)
-        ]
+         for f in os.listdir(args.data_path + '/' + corpus)]
 
 # open the files
 def read_file(file):
     ''' Open the relations file. '''
     relations = []
-    
+    sub_rels = []
     with open(file, 'r', encoding='utf-8') as f:
         next(f)
         for line in f:
@@ -53,50 +53,48 @@ def read_file(file):
                 l = line.strip().split('\t')
                 if not l[11].lower() in subs:
                     relations.append(l[11].lower())
+                else:
+                    sub_rels.append(l[11].lower())
             except IndexError:
                 pass
-        return relations
+        return relations, sub_rels
 
 
 rel_files = [f for f in files if any (x in f for x in ['train', 'test', 'dev'])]
 
-rels = []
+good_rels = []
+sub_rels = []
 for f in rel_files:
-    temp = read_file(f)
-    if temp != []:
-        rels += temp
+    x, y = read_file(f)
+    good_rels += x
+    sub_rels += y
 
-dict_labels = dict(enumerate(list(set(rels))))
-inv_labels = {v:k for k, v in dict_labels.items()}
+dict_labels = dict(enumerate(list(set(good_rels))))
+corpora_labels = {v:k for k, v in dict_labels.items()}
 
-leftovers = []
-for sub in subs:
-    if sub not in inv_labels:
-        try:
-            inv_labels[sub] = inv_labels[subs[sub]]
-        except KeyError:
-            leftovers.append(sub)
-    else:
-        leftovers.append(sub)
-for mapping in mappings:
-    if mapping not in inv_labels:
-        leftovers.append(mapping)
 
-counter = len(inv_labels) -1
-for i in leftovers:
-    counter += 1
-    inv_labels[i] = counter
-    
 
+leftovers = []
+
+# for mapping in mappings:
+#     if mapping not in corpora_labels and mapping not in subs:
+#         leftovers.append(mapping)
 
-# # save the new labels
-print('-'*20)
-print(not_language)
-print(len(inv_labels))
+# counter = max(list(corpora_labels.values())) -1
+# for i in leftovers:
+#     counter += 1
+#     corpora_labels[i] = counter
 
+for sub in sub_rels:
+    try:
+        corpora_labels[sub] = corpora_labels[subs[sub]]
+    except KeyError:
+        corpora_labels[subs[sub]] = max(list(corpora_labels.values())) + 1
+        corpora_labels[sub] = corpora_labels[subs[sub]]
 
+# print(corpora_labels)
 
-with open('mappings/zero-shot/' + not_language + '_zero-shot.tsv', 'w') as f:
+with open('mappings/jaccard/' + 'por.rst' + '.tsv', 'w') as f:
     f.write('LABEL\tMAPPING\n')
-    for k, v in inv_labels.items():
+    for k, v in corpora_labels.items():
         f.write(k + '\t' + str(v) + '\n')
\ No newline at end of file
diff --git a/utils.py b/utils.py
index 3880be60ded41aef92cdfabed33a6aa261e613d4..393bfcb9727bfe1765fd5f60c5d94062daacd6bb 100644
--- a/utils.py
+++ b/utils.py
@@ -18,8 +18,9 @@ def open_mappings(mappings_file):
     mappings = {}
     with open(mappings_file, 'r') as f:
         next(f)
-        for l in f:
-            mappings[l.split('\t')[0]] = int(l.strip().split('\t')[-1])
+        for line in f:
+            l = line.strip().split('\t')
+            mappings[l[0]] = int(l[-1])
             
     inv_mappings = {}
 #     for label, num in mappings.items():
@@ -163,10 +164,13 @@ def open_sentences(path_to_corpora, mappings_dict):
         - dict of sentences for TEST: each test set categorized per corpus
         - ** NEW ** : dict of labels per framework
     '''
-    
+    langs_to_use = args.langs_to_use.split(';')
     corpora = [folder for folder in os.listdir(path_to_corpora) 
                if not any(i in folder for i in ['.md', 'DS_', 'utils', 'ipynb'])
+               # langs here
+               #if any(l in folder for l in langs_to_use)
                ]
+    
                
     # ---------------------
     train_sentences     = []
@@ -186,8 +190,9 @@ def open_sentences(path_to_corpora, mappings_dict):
             train_file = ['/'.join([path_to_corpora, corpus, x])
                               for x in os.listdir(path_to_corpora + '/' + corpus) 
                               if 'train' in x and 'rels' in x
-                             # attention! we block training for some languages if we want HERE
-                              if not args.langs_to_use in x][0]
+                             # attention! we ALLOW training for some corpora if we want HERE
+#                               if any(l in x for l in langs_to_use)
+                         ][0]
             temp = open_file(train_file, mappings_dict)
             # train_sentences += open_file_with_lang(train_file, mappings_dict)
             train_sentences += temp
@@ -216,9 +221,9 @@ def open_sentences(path_to_corpora, mappings_dict):
         all_labels[framework] += [l[-1] for l in temp]  
 #         test_dict_sentences[corpus] += open_file_with_lang(test_file, mappings_dict)
 
-    labels = {framework:set(all_labels[framework]) for framework in all_labels}
+    corpus_labels = {framework:set(all_labels[framework]) for framework in all_labels}
 
-    return train_sentences, dev_dict_sentences, test_dict_sentences, labels
+    return train_sentences, dev_dict_sentences, test_dict_sentences, corpus_labels
 
 
 def open_sentences_with_lang(path_to_corpora, mappings_dict):
@@ -237,7 +242,8 @@ def open_sentences_with_lang(path_to_corpora, mappings_dict):
     train_sentences     = []
     dev_dict_sentences  = {}
     test_dict_sentences = {}
-
+    corpus_labels = []
+    
     for corpus in corpora:
         
         try:
@@ -267,7 +273,7 @@ def open_sentences_with_lang(path_to_corpora, mappings_dict):
 #         test_dict_sentences[corpus] += open_file_with_lang(test_file, mappings_dict)
 
     
-    return train_sentences, dev_dict_sentences, test_dict_sentences
+    return train_sentences, dev_dict_sentences, test_dict_sentences, corpus_labels
 
 
 
@@ -333,7 +339,7 @@ def get_predictions_huggingface(trainer,
     '''
 
     results = trainer.predict(test_set)
-    preds = np.softmax(results.predictions, axis=1)
+#     preds = np.softmax(results.predictions, axis=1)
     top_preds = np.argmax(results.predictions, axis=1)
     results = results.label_ids
     test_acc = round(accuracy_score(top_preds, results), 4)
@@ -341,7 +347,7 @@ def get_predictions_huggingface(trainer,
     if print_results:
         print(corpus, '\t', test_acc, '\n')
     
-    return preds
+#     return preds
 
 
 def get_better_predictions(model,