Skip to content
Snippets Groups Projects
Commit 3eb3386d authored by emetheni's avatar emetheni
Browse files

copy discret files

parent 2b72f24c
Branches
No related tags found
No related merge requests found
Showing with 2984 additions and 0 deletions
......@@ -144,5 +144,6 @@ logs/
*.log
*.out
wandb/
results/
training_output/
# DiscReT: Discourse Relation tagging
The MELODI team submission for Task 3.
## Contents
* **mappings**: a folder with the label conversions we implemented, and specifications on which test results are created from which of our models.
* **pytorch_classifier.py**: the bare classifier using mBERT-base-cased and built on Pytorch
* **make_adapter.py**: code to create a classifier adapter, based on [AdapterHub](https://github.com/adapter-hub/adapter-transformers)
* **adapter_classifier.py**: classifier using one of the trained adapters (training the adapter beforehand is required)
* **requirements.txt**: list of dependencies
* **train_classifiers.sh**: shell script to train all classifiers
* **configure.py**: list of training arguments
* **utils.py**: various functions
## Installation
* Pull data from the [DISRPT Shared Task repository](https://github.com/disrpt/sharedtask2023):
```
git clone https://github.com/disrpt/sharedtask2023
```
* Install requirements, either:
```
pip install -r requirements.txt
```
or by making a conda environment:
```
conda env create -f environment.yml
conda activate discret
```
## Running classifiers
The results are created by three different models:
* the **bare classifier**: an mBERT-base-cased model (max. 6 epochs)
* the **classifier with A1 adapter**: an mBERT-base-cased model trained for 3 epochs with an adapter trained with mBERT-base-cased, for 15 epochs, with frozen layer 1
* the **classifier with A1-3 adapter**: an mBERT-base-cased model trained for 4 epochs with an adapter trained with mBERT-base-cased, for 15 epochs, with frozen layers 1-3
Run either the **train_classifiers.sh** script or each script individually (adapters must be trained beforehand):
### Bare classifier
```
python pytorch_classifier.py \
--num_epochs 6 \
--data_path [PATH_TO_DATA]
```
### Adapter training
A 1:
```
python make_adapter.py \
--num_epochs 15 \
--freeze_layers 'layer.1'
--data_path [PATH_TO_DATA]
```
A 1-3:
```
python make_adapter.py \
--num_epochs 15 \
--freeze_layers 'layer.1;layer.2;layer.3'
--data_path [PATH_TO_DATA]
```
### Classifiers with adapter
with A 1:
```
python adapter_classifier.py \
--num_epochs 3 \
--data_path [PATH_TO_DATA] \
--adapter_name 'adapter_15-epochs_frozen-1'
```
with A 1-3:
```
python adapter_classifier.py \
--num_epochs 4 \
--data_path [PATH_TO_DATA] \
--adapter_name 'adapter_15-epochs_frozen-1-2-3'
```
#!/usr/bin/env python
# coding: utf-8
import torch
import numpy as np
from transformers import AutoModel, AutoTokenizer, get_linear_schedule_with_warmup, AutoAdapterModel, AutoModelWithHeads, AutoConfig, TrainingArguments, Trainer, EvalPrediction, set_seed
from torch import nn
from torch.optim import AdamW
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.autograd import Variable
from tqdm import tqdm
import os
from time import sleep
from datetime import datetime
import sys
from sklearn.metrics import classification_report, accuracy_score
from utils import open_file
import pandas as pd
import datasets
from configure import parse_args
from utils import *
args = parse_args()
now = datetime.now()
dt_string = now.strftime("%d.%m.%y-%H:%M:%S")
adapter_name = args.adapter_name
mappings, inv_mappings = open_mappings(args.mappings_file)
substitutions_file = 'mappings/substitutions.txt'
tokenizer = AutoTokenizer.from_pretrained(args.transformer_model)
# we are saving the test results of specific epochs
# specific_results = open_specific_results('mappings/specific_results.txt')
# if '1-2-3' in adapter_name or 'layer1;layer2;layer3' in adapter_name:
# specific_results = list(specific_results['A1_3'][args.num_epochs])
# else:
# specific_results = list(specific_results['A1'][args.num_epochs])
set_seed(42)
print('Train classifier with adapter\n')
print('Adapter name:', adapter_name)
print('Model:', args.transformer_model)
print('Batch size:', args.batch_size * args.gradient_accumulation_steps)
print('Num epochs:', args.num_epochs)
# Open mappings
mappings, inv_mappings = open_mappings(args.mappings_file)
# Open sentences
train_sentences, dev_dict_sentences, test_dict_sentences = open_sentences(args.data_path, mappings)
# make pandas dataframes
file_header = ['text', 'labels']
train_df = pd.DataFrame([[' '.join(x[-2]), x[-1]] for x in train_sentences],
columns =file_header)
train_df = train_df.sample(frac = 1) # shuffle the train
dev_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]]
for x in sents],
columns = file_header)
for corpus, sents in dev_dict_sentences.items()}
test_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]]
for x in sents],
columns = file_header)
for corpus, sents in test_dict_sentences.items()}
#Make datasets from dataframes
train_dataset = datasets.Dataset.from_pandas(train_df)
dev_dict_dataset = {corpus:datasets.Dataset.from_pandas(dev_df)
for corpus, dev_df in dev_dict_df.items()}
test_dict_dataset = {corpus:datasets.Dataset.from_pandas(dev_df)
for corpus, dev_df in test_dict_df.items()}
# get number of labels
num_labels = len(set([int(x.strip())
for x in train_df['labels'].to_string(index=False).split('\n')])) +1
# Encode the data
train_dataset = train_dataset.map(encode_batch, batched=True)
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
encoded_dev_dataset = {}
for corpus in dev_dict_dataset:
temp = dev_dict_dataset[corpus].map(encode_batch, batched=True)
temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
encoded_dev_dataset[corpus] = temp
encoded_test_dataset = {}
for corpus in test_dict_dataset:
temp = test_dict_dataset[corpus].map(encode_batch, batched=True)
temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
encoded_test_dataset[corpus] = temp
# ===============================
# Training params
# ===============================
model = AutoAdapterModel.from_pretrained(args.transformer_model)
active_adapter = model.load_adapter(adapter_name,
config = adapter_name + "/adapter_config.json")
model.set_active_adapters(active_adapter)
training_args = TrainingArguments(
learning_rate = 2e-5, #1e-4,
num_train_epochs = args.num_epochs,
per_device_train_batch_size = args.batch_size,
per_device_eval_batch_size = args.batch_size,
gradient_accumulation_steps = args.gradient_accumulation_steps,
logging_steps = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)),
output_dir = "./training_output",
overwrite_output_dir =True,
remove_unused_columns=False,
)
trainer = Trainer(
model = model,
args = training_args,
train_dataset = train_dataset
)
# Freeze layers in the classifier if desired
if args.freeze_layers != '':
layers_to_freeze = args.freeze_layers.split(';')
for name, param in model.named_parameters():
if any(x in name for x in layers_to_freeze):
param.requires_grad = False
# ===============================
# Start the training 🚀
# ===============================
print('Start training...')
trainer.train()
# Dev results
print('\nDev results:')
for corpus in encoded_dev_dataset:
print()
dev_results = get_predictions_huggingface(trainer, corpus,
encoded_dev_dataset[corpus])
path_results = 'results/dev/' + adapter_name + '_' + str(args.num_epochs)
if not os.path.exists(path_results):
os.makedirs(path_results)
print_results_to_file(corpus,
dev_dict_sentences[corpus],
dev_results,
inv_mappings,
substitutions_file,
path_results)
# Test results
print('\ntest results:')
for corpus in encoded_test_dataset:
print()
test_results = get_predictions_huggingface(trainer,
corpus,
encoded_test_dataset[corpus])
path_results = 'results/test/' + adapter_name + '_' + str(args.num_epochs)
if not os.path.exists(path_results):
os.makedirs(path_results)
print_results_to_file(corpus,
test_dict_sentences[corpus],
test_results,
inv_mappings,
substitutions_file,
path_results)
# for corpus in test_dict_dataloader:
# test_results = get_predictions(model,
# corpus,
# test_dict_dataloader[corpus])
# path_results = 'results/test/pytorch' + str(epoch_num+1)
# if not os.path.exists(path_results):
# os.makedirs(path_results)
# print_results_to_file(corpus,
# test_dict_sentences[corpus],
# test_results,
# inv_mappings, substitutions_file,
# path_results)
# Save specific test results
# print('\nTest results:')
# for corpus in encoded_test_dataset:
# print()
# test_results = get_predictions_huggingface(trainer, corpus,
# encoded_test_dataset[corpus])
#
# print_results_to_file(corpus, test_dict_sentences[corpus], test_results,
# inv_mappings, substitutions_file)
\ No newline at end of file
import argparse
import sys
def parse_args():
"""
Parse input arguments.
"""
parser = argparse.ArgumentParser()
parser.add_argument("--data_path", default="../sharedtask2023/data", type=str,
help="The path to the shared task data file from Github.")
parser.add_argument("--mappings_file", default="mappings/mappings_substitutions.tsv", type=str,
help="The mappings file for all relations.")
# transformer model
parser.add_argument("--transformer_model", default="bert-base-multilingual-cased", type=str,
help="Model used, default: bert-multilingual-base-cased")
# Number of training epochs
parser.add_argument("--num_epochs", default=4, type=int,
help="Number of training epochs. Default: 4")
# Number of gradient accumulation steps
parser.add_argument("--gradient_accumulation_steps", default=16, type=int,
help="Number of gradient accumulation steps. Default: 16")
# Dropout
parser.add_argument("--dropout", default=0.1, type=float,
help="Dropout.")
# Batch size
parser.add_argument("--batch_size", default=8, type=int,
help="With CUDA: max. 8, without: max. 16. Default: 8")
# Use CUDA
parser.add_argument("--use_cuda", default='yes', type=str,
help="Use CUDA [yes/no]. Careful of batch size!")
# freeze layers
parser.add_argument("--freeze_layers", default='', type=str,
help="List of layer(s) to freeze, a str separated by ;. Example: 'layer.1;layer.2'")
# load adapter
parser.add_argument("--adapter_name", default='', type=str,
help="If you want to use an adapter")
# normalize direction
parser.add_argument("--normalize_direction", default='yes', type=str,
help="Change order of sentences when the direction of relations is 1<2 to 2>1.")
args = parser.parse_args()
return args
\ No newline at end of file
name: discret
channels:
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=5.1=1_gnu
- ca-certificates=2023.01.10=h06a4308_0
- ld_impl_linux-64=2.38=h1181459_1
- libffi=3.4.4=h6a678d5_0
- libgcc-ng=11.2.0=h1234567_1
- libgomp=11.2.0=h1234567_1
- libstdcxx-ng=11.2.0=h1234567_1
- ncurses=6.4=h6a678d5_0
- openssl=1.1.1t=h7f8727e_0
- pip=23.0.1=py38h06a4308_0
- python=3.8.16=h7a1cb2a_3
- readline=8.2=h5eee18b_0
- setuptools=66.0.0=py38h06a4308_0
- sqlite=3.41.2=h5eee18b_0
- tk=8.6.12=h1ccaba5_0
- wheel=0.38.4=py38h06a4308_0
- xz=5.4.2=h5eee18b_0
- zlib=1.2.13=h5eee18b_0
- pip:
- adapter-transformers==3.0.1
- aiohttp==3.8.4
- aiosignal==1.3.1
- async-timeout==4.0.2
- attrs==23.1.0
- certifi==2023.5.7
- charset-normalizer==3.1.0
- click==8.1.3
- cmake==3.26.3
- datasets==2.4.0
- dill==0.3.5.1
- filelock==3.12.0
- frozenlist==1.3.3
- fsspec==2023.5.0
- huggingface-hub==0.14.1
- idna==3.4
- jinja2==3.1.2
- joblib==1.2.0
- lit==16.0.3
- markupsafe==2.1.2
- mpmath==1.3.0
- multidict==6.0.4
- multiprocess==0.70.13
- networkx==3.1
- numpy==1.24.3
- nvidia-cublas-cu11==11.10.3.66
- nvidia-cuda-cupti-cu11==11.7.101
- nvidia-cuda-nvrtc-cu11==11.7.99
- nvidia-cuda-runtime-cu11==11.7.99
- nvidia-cudnn-cu11==8.5.0.96
- nvidia-cufft-cu11==10.9.0.58
- nvidia-curand-cu11==10.2.10.91
- nvidia-cusolver-cu11==11.4.0.1
- nvidia-cusparse-cu11==11.7.4.91
- nvidia-nccl-cu11==2.14.3
- nvidia-nvtx-cu11==11.7.91
- packaging==23.1
- pandas==2.0.1
- pillow==9.5.0
- pyarrow==12.0.0
- python-dateutil==2.8.2
- pytz==2023.3
- pyyaml==6.0
- regex==2023.5.5
- requests==2.30.0
- responses==0.18.0
- sacremoses==0.0.53
- scikit-learn==1.2.2
- scipy==1.10.1
- six==1.16.0
- sympy==1.12
- threadpoolctl==3.1.0
- tokenizers==0.12.1
- torch==2.0.1
- torchaudio==2.0.2
- torchvision==0.15.2
- tqdm==4.65.0
- transformers==4.18.0
- triton==2.0.0
- typing-extensions==4.5.0
- tzdata==2023.3
- urllib3==2.0.2
- xxhash==3.2.0
- yarl==1.9.2ename
\ No newline at end of file
#!/usr/bin/env python
# coding: utf-8
import torch
import numpy as np
from transformers import AutoModel, AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, AutoConfig, TrainingArguments, Trainer, EvalPrediction, set_seed
from torch import nn
from torch.optim import AdamW
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.autograd import Variable
from tqdm import tqdm
import os
from time import sleep
from datetime import datetime
import sys
from sklearn.metrics import classification_report, accuracy_score
from utils import open_file
import pandas as pd
import datasets
from configure import parse_args
from utils import *
args = parse_args()
now = datetime.now()
dt_string = now.strftime("%d.%m.%y-%H:%M:%S")
save_name = args.mappings_file.split('-')[-1]
mappings, inv_mappings = open_mappings(args.mappings_file)
substitutions_file = 'mappings/substitutions.txt'
tokenizer = AutoTokenizer.from_pretrained(args.transformer_model)
set_seed(42)
print('Model:', args.transformer_model)
print('Batch size:', args.batch_size * args.gradient_accumulation_steps)
print('Num epochs:', args.num_epochs)
# Open mappings
mappings, inv_mappings = open_mappings(args.mappings_file)
# Open sentences
train_sentences, dev_dict_sentences, test_dict_sentences = open_sentences(args.data_path, mappings)
# make pandas dataframes
file_header = ['text', 'labels']
train_df = pd.DataFrame([[' '.join(x[-2]), x[-1]] for x in train_sentences],
columns =file_header)
train_df = train_df.sample(frac = 1) # shuffle the train
dev_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]]
for x in sents],
columns = file_header)
for corpus, sents in dev_dict_sentences.items()}
test_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]]
for x in sents],
columns = file_header)
for corpus, sents in test_dict_sentences.items()}
#Make datasets from dataframes
train_dataset = datasets.Dataset.from_pandas(train_df)
dev_dict_dataset = {corpus:datasets.Dataset.from_pandas(dev_df)
for corpus, dev_df in dev_dict_df.items()}
test_dict_dataset = {corpus:datasets.Dataset.from_pandas(dev_df)
for corpus, dev_df in test_dict_df.items()}
# get number of labels
num_labels = len(set([int(x.strip())
for x in train_df['labels'].to_string(index=False).split('\n')])) +1
# Encode the data
train_dataset = train_dataset.map(encode_batch, batched=True)
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
encoded_dev_dataset = {}
for corpus in dev_dict_dataset:
temp = dev_dict_dataset[corpus].map(encode_batch, batched=True)
temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
encoded_dev_dataset[corpus] = temp
encoded_test_dataset = {}
for corpus in test_dict_dataset:
temp = test_dict_dataset[corpus].map(encode_batch, batched=True)
temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
encoded_test_dataset[corpus] = temp
# ===============================
# Training params
# ===============================
model = AutoModelForSequenceClassification.from_pretrained(args.transformer_model)
training_args = TrainingArguments(
learning_rate = 2e-5, #1e-4,
num_train_epochs = args.num_epochs,
per_device_train_batch_size = args.batch_size,
per_device_eval_batch_size = args.batch_size,
gradient_accumulation_steps = args.gradient_accumulation_steps,
logging_steps = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)),
output_dir = "./training_output",
overwrite_output_dir =True,
remove_unused_columns=False,
)
trainer = Trainer(
model = model,
args = training_args,
train_dataset = train_dataset
)
# Freeze layers in the classifier if desired
if args.freeze_layers != '':
layers_to_freeze = args.freeze_layers.split(';')
for name, param in model.named_parameters():
if any(x in name for x in layers_to_freeze):
param.requires_grad = False
# ===============================
# Start the training 🚀
# ===============================
print('Start training...')
trainer.train()
# Dev results
print('\nDev results:')
for corpus in encoded_dev_dataset:
print()
dev_results = get_predictions_huggingface(trainer, corpus,
encoded_dev_dataset[corpus])
path_results = 'results/dev/' + save_name + '_' + str(args.num_epochs)
if not os.path.exists(path_results):
os.makedirs(path_results)
print_results_to_file(corpus,
dev_dict_sentences[corpus],
dev_results,
inv_mappings,
#substitutions_file,
path_results)
# Test results
print('\ntest results:')
for corpus in encoded_test_dataset:
print()
test_results = get_predictions_huggingface(trainer,
corpus,
encoded_test_dataset[corpus])
path_results = 'results/test/' + save_name + '_' + str(args.num_epochs)
if not os.path.exists(path_results):
os.makedirs(path_results)
print_results_to_file(corpus,
test_dict_sentences[corpus],
test_results,
inv_mappings,
substitutions_file,
path_results)
# for corpus in test_dict_dataloader:
# test_results = get_predictions(model,
# corpus,
# test_dict_dataloader[corpus])
# path_results = 'results/test/pytorch' + str(epoch_num+1)
# if not os.path.exists(path_results):
# os.makedirs(path_results)
# print_results_to_file(corpus,
# test_dict_sentences[corpus],
# test_results,
# inv_mappings, substitutions_file,
# path_results)
# Save specific test results
# print('\nTest results:')
# for corpus in encoded_test_dataset:
# print()
# test_results = get_predictions_huggingface(trainer, corpus,
# encoded_test_dataset[corpus])
#
# print_results_to_file(corpus, test_dict_sentences[corpus], test_results,
# inv_mappings, substitutions_file)
\ No newline at end of file
#!/usr/bin/env python
# coding: utf-8
import os
import numpy as np
from datetime import datetime
import pandas as pd
import torch
from transformers import AutoModel, AutoTokenizer, AutoModelWithHeads, AutoConfig, TrainingArguments, AdapterTrainer, EvalPrediction, set_seed
import datasets
from configure import parse_args
from sklearn.metrics import accuracy_score
from utils import *
# parameters
args = parse_args()
tokenizer = AutoTokenizer.from_pretrained(args.transformer_model)
layers_to_freeze = args.freeze_layers.split(';')
set_seed(42)
batch_size = args.batch_size
mapping_classes = args.mappings_file[:-4].split('-')[-1]
# Set name for adapter
adapter_name = 'A_' + str(args.num_epochs) + '-F_' + args.freeze_layers.replace('layer.', '-').replace(';', '') + '-M_' + mapping_classes
print('Create classifier adapter\n')
print('Name:', adapter_name)
print('Model:', args.transformer_model)
print('Batch size:', args.batch_size * args.gradient_accumulation_steps)
print('Frozen layers:', args.freeze_layers.replace(';', ', '))
# Open mappings
mappings, inv_mappings = open_mappings(args.mappings_file)
# Open sentences
train_sentences, dev_dict_sentences, _ = open_sentences(args.data_path, mappings)
# make pandas dataframes
file_header = ['text', 'labels']
train_df = pd.DataFrame([[' '.join(x[-2]), x[-1]] for x in train_sentences], columns=file_header)
train_df = train_df.sample(frac = 1) # shuffle the train
# get a global dev accuracy, we will not be directly using these results
dev_df = pd.DataFrame([[' '.join(x[-2]), x[-1]]
for sents in dev_dict_sentences.values()
for x in sents ], columns=file_header)
#Make datasets from dataframes
train_dataset = datasets.Dataset.from_pandas(train_df)
dev_dataset = datasets.Dataset.from_pandas(dev_df)
# get number of labels
num_labels = len(set([int(x.strip())
for x in train_df['labels'].to_string(index=False).split('\n')])) +1
# Encode the data
train_dataset = train_dataset.map(encode_batch, batched=True)
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
dev_dataset = dev_dataset.map(encode_batch, batched=True)
dev_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
# Training
config = AutoConfig.from_pretrained(
args.transformer_model,
num_labels=num_labels,
)
model = AutoModelWithHeads.from_pretrained(
args.transformer_model,
config=config,
)
# Add a new adapter
model.add_adapter(adapter_name)
# Add a matching classification head
model.add_classification_head(
adapter_name,
num_labels=num_labels,
id2label=inv_mappings
)
# Activate the adapter
print('Initialize adapter...')
model.train_adapter(adapter_name)
training_args = TrainingArguments(
learning_rate = 1e-4,
num_train_epochs = args.num_epochs,
per_device_train_batch_size = args.batch_size,
per_device_eval_batch_size = args.batch_size,
gradient_accumulation_steps = args.gradient_accumulation_steps,
logging_steps = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)),
output_dir = "./training_output",
overwrite_output_dir =True,
remove_unused_columns=False,
)
trainer = AdapterTrainer(
model=model,
args=training_args,
train_dataset=train_dataset,
)
# freeze layers
if args.freeze_layers != '':
for name, param in model.named_parameters():
if any(x in name for x in layers_to_freeze):
param.requires_grad = False
# Start the training 🚀
print('\nStart training...\n')
trainer.train()
# Save adapter to load for the finetuned model
model.save_adapter(adapter_name, adapter_name)
# Perform evaluation
results = trainer.predict(dev_dataset)
preds = np.argmax(results.predictions, axis=1)
results = results.label_ids
print('Dev accuracy:', round(accuracy_score(preds, results), 4))
\ No newline at end of file
LABEL CLASS MAPPING
adversative contrast 1
adversative-antithesis contrast 1
adversative-concession contrast 1
adversative-contrast contrast 1
alternative condition 10
antithesis contrast 1
attribution attribution 2
attribution-negative attribution 2
attribution-positive attribution 2
background background 8
causal cause 6
causal-cause cause 6
causal-result cause 6
cause cause 6
cause-effect cause 6
circumstance background 8
comparison comparison 12
concession contrast 1
conclusion evaluation 13
condition condition 10
conjunction joint 16
context explanation 15
context-background background 8
context-circumstance background 8
contingency condition 10
contingency-condition condition 10
contrast contrast 1
disjunction same-unit 11
e-elaboration elaboration 3
effect cause 6
elaboration elaboration 3
elaboration-additional elaboration 3
elaboration-attribute elaboration 3
enablement enablement 14
evaluation evaluation 13
evaluation-comment evaluation 13
evaluation-n evaluation 13
evaluation-s evaluation 13
evidence explanation 15
explanation explanation 15
explanation-evidence explanation 15
explanation-justify explanation 15
explanation-motivation explanation 15
interpretation evaluation 13
interpretation-evaluation evaluation 13
joint joint 16
joint-disjunction joint 16
joint-list joint 16
joint-other joint 16
joint-sequence temporal 17
justify explanation 15
list joint 16
manner-means manner-means 4
means manner-means 4
mode manner-means 4
mode-manner manner-means 4
mode-means manner-means 4
motivation explanation 15
nonvolitional-cause cause 6
nonvolitional-cause-e cause 6
nonvolitional-result cause 6
nonvolitional-result-e cause 6
organization textual-organization 0
organization-heading textual-organization 0
organization-phatic textual-organization 0
organization-preparation textual-organization 0
otherwise condition 10
parenthetical same-unit 11
preparation background 8
purpose enablement 14
purpose-attribute enablement 14
purpose-goal enablement 14
reason explanation 15
restatement summary 5
restatement-mn summary 5
restatement-partial summary 5
restatement-repetition summary 5
result cause 6
sequence temporal 17
solutionhood topic-comment 7
summary summary 5
temporal temporal 17
textual-organization textual-organization 0
topic topic-comment 7
topic-change topic-change 9
topic-comment topic-comment 7
topic-drift topic-change 9
topic-question topic-comment 7
topic-solutionhood topic-comment 7
unconditional condition 10
unless condition 10
volitional-cause cause 6
volitional-result cause 6
causation cause 6
comparison.concession contrast 1
comparison.concession+speechact comparison 12
comparison.contrast contrast 1
comparison.degree comparison 12
comparison.similarity comparison 12
conditional condition 10
contingency.cause condition 10
contingency.cause+belief condition 10
contingency.cause+speechact condition 10
contingency.condition condition 10
contingency.condition+speechact condition 10
contingency.goal condition 10
contingency.negative-cause cause 6
contingency.negative-condition condition 10
contingency.purpose enablement 14
expansion elaboration 3
expansion.alternative condition 10
expansion.conjunction joint 16
expansion.correction contrast 1
expansion.disjunction cause 6
expansion.equivalence comparison 12
expansion.exception contrast 1
expansion.instantiation elaboration 3
expansion.level-of-detail elaboration 3
expansion.manner manner-means 4
expansion.restatement summary 5
expansion.substitution contrast 1
hypophora topic-comment 7
interrupted topic-change 9
progression temporal 17
repetition elaboration 3
temporal.asynchronous temporal 17
temporal.synchronous temporal 17
temporal.synchrony temporal 17
qap topic-comment 7
contingency.negative-condition+speechact condition 10
contingency.negative condition 10
expansion.genexpansion elaboration 3
expansion.level elaboration 3
qap.hypophora topic-comment 7
bg-compare background 8
bg-general background 8
bg-goal background 8
cause-result cause 6
elab-addition elaboration 3
elab-aspect elaboration 3
elab-definition elaboration 3
elab-enumember elaboration 3
elab-example elaboration 3
elab-process_step elaboration 3
exp-evidence explanation 15
exp-reason explanation 15
findings cause 6
acknowledgement attribution 2
alternation condition 10
clarification_question topic-comment 7
comment evaluation 13
continuation joint 16
correction contrast 1
explanation* explanation 15
flashback explanation 15
frame explanation 15
goal enablement 14
narration elaboration 3
parallel joint 16
q_elab elaboration 3
question_answer_pair topic-comment 7
temploc temporal 17
LABEL CLASS MAPPING
adversative contrast 1
adversative-antithesis contrast 1
adversative-concession contrast 1
adversative-contrast contrast 1
alternative condition 10
antithesis contrast 1
attribution attribution 2
attribution-negative attribution 2
attribution-positive attribution 2
background background 8
causal cause 6
causal-cause cause 6
causal-result cause 6
cause cause 6
cause-effect cause 6
circumstance background 8
comparison comparison 11
concession contrast 1
conclusion evaluation 12
condition condition 10
conjunction joint 15
context background 8
context-background background 8
context-circumstance background 8
contingency condition 10
contingency-condition condition 10
contrast contrast 1
disjunction joint 15
e-elaboration elaboration 3
effect cause 6
elaboration elaboration 3
elaboration-additional elaboration 3
elaboration-attribute elaboration 3
enablement enablement 13
evaluation evaluation 12
evaluation-comment evaluation 12
evaluation-n evaluation 12
evaluation-s evaluation 12
evidence explanation 14
explanation explanation 14
explanation-evidence explanation 14
explanation-justify explanation 14
explanation-motivation explanation 14
interpretation evaluation 12
interpretation-evaluation evaluation 12
joint joint 15
joint-disjunction joint 15
joint-list joint 15
joint-other joint 15
joint-sequence temporal 16
justify explanation 14
list joint 15
manner-means manner-means 4
means manner-means 4
mode manner-means 4
mode-manner manner-means 4
mode-means manner-means 4
motivation explanation 14
nonvolitional-cause cause 6
nonvolitional-cause-e cause 6
nonvolitional-result cause 6
nonvolitional-result-e cause 6
organization background 8
organization-heading background 8
organization-phatic background 8
organization-preparation background 8
otherwise condition 10
parenthetical elaboration 3
preparation background 8
purpose enablement 13
purpose-attribute enablement 13
purpose-goal enablement 13
reason explanation 14
restatement summary 5
restatement-mn summary 5
restatement-partial summary 5
restatement-repetition summary 5
result cause 6
sequence temporal 16
solutionhood topic-comment 7
summary summary 5
temporal temporal 16
textual-organization background 8
topic topic-comment 7
topic-change topic-change 9
topic-comment topic-comment 7
topic-drift topic change 0
topic-question topic-comment 7
topic-solutionhood topic-comment 7
unconditional condition 10
unless condition 10
volitional-cause cause 6
volitional-result cause 6
causation cause 6
comparison.concession contrast 1
comparison.concession+speechact comparison 11
comparison.contrast contrast 1
comparison.degree comparison 11
comparison.similarity comparison 11
conditional condition 10
contingency.cause condition 10
contingency.cause+belief condition 10
contingency.cause+speechact condition 10
contingency.condition condition 10
contingency.condition+speechact condition 10
contingency.goal condition 10
contingency.negative-cause cause 6
contingency.negative-condition condition 10
contingency.purpose enablement 13
expansion elaboration 3
expansion.alternative condition 10
expansion.conjunction joint 15
expansion.correction contrast 1
expansion.disjunction joint 15
expansion.equivalence comparison 11
expansion.exception contrast 1
expansion.instantiation elaboration 3
expansion.level-of-detail elaboration 3
expansion.manner manner-means 4
expansion.restatement summary 5
expansion.substitution contrast 1
hypophora topic-comment 7
interrupted topic-change 9
progression temporal 16
repetition elaboration 3
temporal.asynchronous temporal 16
temporal.synchronous temporal 16
temporal.synchrony temporal 16
qap topic-comment 7
contingency.negative-condition+speechact condition 10
contingency.negative condition 10
expansion.genexpansion elaboration 3
expansion.level elaboration 3
qap.hypophora topic-comment 7
bg-compare background 8
bg-general background 8
bg-goal background 8
cause-result cause 6
elab-addition elaboration 3
elab-aspect elaboration 3
elab-definition elaboration 3
elab-enumember elaboration 3
elab-example elaboration 3
elab-process_step elaboration 3
exp-evidence explanation 14
exp-reason explanation 14
findings cause 6
acknowledgement attribution 2
alternation condition 10
clarification_question topic-comment 7
comment evaluation 12
continuation joint 15
correction contrast 1
explanation* explanation 14
flashback explanation 14
frame explanation 14
goal enablement 13
narration elaboration 3
parallel joint 15
q_elab elaboration 3
question_answer_pair topic-comment 7
temploc temporal 16
mode-means 0
expansion.restatement 1
expansion.substitution 2
bg-compare 3
root 4
organization-preparation 5
topic-solutionhood 6
evaluation-n 7
contingency.negative-cause 8
organization 9
causal 10
elab-enumember 11
organization-phatic 12
purpose-attribute 13
mode 14
temporal 15
contingency.cause+belief 16
means 17
expansion 18
comparison.concession+speechact 19
parallel 20
contingency.condition 21
context-circumstance 22
restatement-partial 23
expansion.equivalence 24
interrupted 25
contingency.negative-condition 26
comment 27
organization-heading 28
joint-other 29
result 30
expansion.alternative 31
parenthetical 32
clarification_question 33
background 34
conjunction 77
nonvolitional-result-e 36
manner-means 37
elaboration-additional 38
attribution 39
volitional-result 40
contingency.negative 41
mode-manner 42
expansion.level-of-detail 43
topic-comment 44
joint-sequence 45
elab-addition 46
explanation* 47
comparison.similarity 48
reason 49
solutionhood 50
nonvolitional-cause 51
contingency.negative-condition+speechact 52
topic-question 53
elab-definition 54
hypophora 55
adversative 56
elaboration-attribute 57
nonvolitional-result 58
joint 59
bg-goal 60
contrast 61
explanation-justify 62
context-background 63
topic-drift 64
contingency.purpose 65
explanation 66
elaboration 67
elab-example 68
evaluation-comment 69
continuation 70
exp-reason 71
interpretation 72
conclusion 73
attribution-negative 74
flashback 75
frame 76
expansion.conjunction 77
preparation 78
temporal.asynchronous 79
attribution-positive 80
acknowledgement 81
comparison.contrast 82
condition 83
contingency.goal 84
restatement-repetition 85
temploc 86
adversative-contrast 87
topic-change 88
context 89
effect 90
expansion.correction 91
contingency.cause 92
progression 93
evaluation-s 94
explanation-evidence 95
volitional-cause 96
concession 97
expansion.exception 98
summary 99
comparison.degree 100
adversative-concession 101
comparison 102
topic 103
expansion.instantiation 104
purpose-goal 105
evaluation 106
expansion.disjunction 107
explanation-motivation 108
nonvolitional-cause-e 109
question_answer_pair 110
restatement-mn 111
contingency.cause+speechact 112
cause-effect 113
purpose 114
enablement 115
cause 116
e-elaboration 117
contingency.condition+speechact 118
interpretation-evaluation 119
adversative-antithesis 120
antithesis 121
expansion.manner 122
comparison.concession 123
narration 124
contingency-condition 125
contingency 126
temporal.synchronous 127
circumstance 128
q_elab 129
causal-cause 130
joint-list 131
elab-aspect 132
elab-process_step 133
causal-result 134
alternation 31
conditional 83
goal 105
correction 91
alternative 31
disjunction 107
evidence 95
justify 62
list 131
motivation 108
restatement 1
sequence 45
unless 61
causation 116
bg-general 34
exp-evidence 95
otherwise 56
unconditional 107
joint-disjunction 107
repetition 85
temporal.synchrony 127
textual-organization 9
cause-result 113
findings 30
qap 110
expansion.level 43
qap.hypophora 55
expansion.genexpansion 18
BEST EPOCH Corpus
B 3 deu.rst.pcc
A1_3 4 eng.dep.covdtb
B 4 eng.dep.scidtb
B 4 eng.pdtb.pdtb
A1_3 4 eng.pdtb.tedm
A1_3 4 eng.rst.gum
B 5 eng.rst.rstdt
A1_3 4 eng.sdrt.stac
A1_3 4 eus.rst.ert
B 3 fas.rst.prstc
A1_3 4 fra.sdrt.annodis
A1_3 4 ita.pdtb.luna
A1_3 4 nld.rst.nldt
A1 3 por.pdtb.crpc
A1_3 4 por.pdtb.tedm
A1_3 4 por.rst.cstn
A1_3 4 rus.rst.rrt
A1_3 4 spa.rst.rststb
B 5 spa.rst.sctb
A1_3 4 tha.pdtb.tdtb
B 3 tur.pdtb.tdb
A1_3 4 tur.pdtb.tedm
A1 3 zho.dep.scidtb
B 4 zho.pdtb.cdtb
A1 3 zho.rst.gcdt
A1_3 4 zho.rst.sctb
\ No newline at end of file
LABEL CLASS
adversative contrast
adversative-antithesis contrast
adversative-concession contrast
adversative-contrast contrast
alternative condition
antithesis contrast
attribution attribution
attribution-negative attribution
attribution-positive attribution
background background
causal cause
causal-cause cause
causal-result cause
cause cause
cause-effect cause
circumstance background
comparison comparison
concession contrast
conclusion evaluation
condition condition
conjunction joint
context explanation
context-background background
context-circumstance background
contingency condition
contingency-condition condition
contrast contrast
disjunction same-unit
e-elaboration elaboration
effect cause
elaboration elaboration
elaboration-additional elaboration
elaboration-attribute elaboration
enablement enablement
evaluation evaluation
evaluation-comment evaluation
evaluation-n evaluation
evaluation-s evaluation
evidence explanation
explanation explanation
explanation-evidence explanation
explanation-justify explanation
explanation-motivation explanation
interpretation evaluation
interpretation-evaluation evaluation
joint joint
joint-disjunction joint
joint-list joint
joint-other joint
joint-sequence temporal
justify explanation
list joint
manner-means manner-means
means manner-means
mode manner-means
mode-manner manner-means
mode-means manner-means
motivation explanation
nonvolitional-cause cause
nonvolitional-cause-e cause
nonvolitional-result cause
nonvolitional-result-e cause
organization textual-organization
organization-heading textual-organization
organization-phatic textual-organization
organization-preparation textual-organization
otherwise condition
parenthetical same-unit
preparation background
purpose enablement
purpose-attribute enablement
purpose-goal enablement
reason explanation
restatement summary
restatement-mn summary
restatement-partial summary
restatement-repetition summary
result cause
sequence temporal
solutionhood topic-comment
summary summary
temporal temporal
textual-organization textual-organization
topic topic-comment
topic-change topic-change
topic-comment topic-comment
topic-drift topic-change
topic-question topic-comment
topic-solutionhood topic-comment
unconditional condition
unless condition
volitional-cause cause
volitional-result cause
causation cause
comparison.concession contrast
comparison.concession+speechact comparison
comparison.contrast contrast
comparison.degree comparison
comparison.similarity comparison
conditional condition
contingency.cause condition
contingency.cause+belief condition
contingency.cause+speechact condition
contingency.condition condition
contingency.condition+speechact condition
contingency.goal condition
contingency.negative-cause cause
contingency.negative-condition condition
contingency.purpose enablement
expansion elaboration
expansion.alternative condition
expansion.conjunction joint
expansion.correction contrast
expansion.disjunction cause
expansion.equivalence comparison
expansion.exception contrast
expansion.instantiation elaboration
expansion.level-of-detail elaboration
expansion.manner manner-means
expansion.restatement summary
expansion.substitution contrast
hypophora topic-comment
interrupted topic-change
progression temporal
repetition elaboration
temporal.asynchronous temporal
temporal.synchronous temporal
temporal.synchrony temporal
qap topic-comment
contingency.negative-condition+speechact condition
contingency.negative condition
expansion.genexpansion elaboration
expansion.level elaboration
qap.hypophora topic-comment
bg-compare background
bg-general background
bg-goal background
cause-result cause
elab-addition elaboration
elab-aspect elaboration
elab-definition elaboration
elab-enumember elaboration
elab-example elaboration
elab-process_step elaboration
exp-evidence explanation
exp-reason explanation
findings cause
acknowledgement attribution
alternation condition
clarification_question topic-comment
comment evaluation
continuation joint
correction contrast
explanation* explanation
flashback explanation
frame explanation
goal enablement
narration elaboration
parallel joint
q_elab elaboration
question_answer_pair topic-comment
temploc temporal
LABEL CLASS
adversative contrast
adversative-antithesis contrast
adversative-concession contrast
adversative-contrast contrast
alternative condition
antithesis contrast
attribution attribution
attribution-negative attribution
attribution-positive attribution
background background
causal cause
causal-cause cause
causal-result cause
cause cause
cause-effect cause
circumstance background
comparison comparison
concession contrast
conclusion evaluation
condition condition
conjunction joint
context background
context-background background
context-circumstance background
contingency condition
contingency-condition condition
contrast contrast
disjunction joint
e-elaboration elaboration
effect cause
elaboration elaboration
elaboration-additional elaboration
elaboration-attribute elaboration
enablement enablement
evaluation evaluation
evaluation-comment evaluation
evaluation-n evaluation
evaluation-s evaluation
evidence explanation
explanation explanation
explanation-evidence explanation
explanation-justify explanation
explanation-motivation explanation
interpretation evaluation
interpretation-evaluation evaluation
joint joint
joint-disjunction joint
joint-list joint
joint-other joint
joint-sequence temporal
justify explanation
list joint
manner-means manner-means
means manner-means
mode manner-means
mode-manner manner-means
mode-means manner-means
motivation explanation
nonvolitional-cause cause
nonvolitional-cause-e cause
nonvolitional-result cause
nonvolitional-result-e cause
organization background
organization-heading background
organization-phatic background
organization-preparation background
otherwise condition
parenthetical elaboration
preparation background
purpose enablement
purpose-attribute enablement
purpose-goal enablement
reason explanation
restatement summary
restatement-mn summary
restatement-partial summary
restatement-repetition summary
result cause
sequence temporal
solutionhood topic-comment
summary summary
temporal temporal
textual-organization background
topic topic-comment
topic-change topic-change
topic-comment topic-comment
topic-drift topic change
topic-question topic-comment
topic-solutionhood topic-comment
unconditional condition
unless condition
volitional-cause cause
volitional-result cause
causation cause
comparison.concession contrast
comparison.concession+speechact comparison
comparison.contrast contrast
comparison.degree comparison
comparison.similarity comparison
conditional condition
contingency.cause condition
contingency.cause+belief condition
contingency.cause+speechact condition
contingency.condition condition
contingency.condition+speechact condition
contingency.goal condition
contingency.negative-cause cause
contingency.negative-condition condition
contingency.purpose enablement
expansion elaboration
expansion.alternative condition
expansion.conjunction joint
expansion.correction contrast
expansion.disjunction joint
expansion.equivalence comparison
expansion.exception contrast
expansion.instantiation elaboration
expansion.level-of-detail elaboration
expansion.manner manner-means
expansion.restatement summary
expansion.substitution contrast
hypophora topic-comment
interrupted topic-change
progression temporal
repetition elaboration
temporal.asynchronous temporal
temporal.synchronous temporal
temporal.synchrony temporal
qap topic-comment
contingency.negative-condition+speechact condition
contingency.negative condition
expansion.genexpansion elaboration
expansion.level elaboration
qap.hypophora topic-comment
bg-compare background
bg-general background
bg-goal background
cause-result cause
elab-addition elaboration
elab-aspect elaboration
elab-definition elaboration
elab-enumember elaboration
elab-example elaboration
elab-process_step elaboration
exp-evidence explanation
exp-reason explanation
findings cause
acknowledgement attribution
alternation condition
clarification_question topic-comment
comment evaluation
continuation joint
correction contrast
explanation* explanation
flashback explanation
frame explanation
goal enablement
narration elaboration
parallel joint
q_elab elaboration
question_answer_pair topic-comment
temploc temporal
This diff is collapsed.
#!/usr/bin/env python
# coding: utf-8
import torch
import numpy as np
from transformers import AutoModel, AutoTokenizer, get_linear_schedule_with_warmup, set_seed
from torch import nn
from torch.optim import AdamW
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.autograd import Variable
from tqdm import tqdm
import os
from time import sleep
from datetime import datetime
import sys
from sklearn.metrics import classification_report, accuracy_score
from configure import parse_args
from utils import *
args = parse_args()
now = datetime.now()
dt_string = now.strftime("%d.%m.%y-%H:%M:%S")
layers_to_freeze = args.freeze_layers.split(";")
substitutions_file = 'mappings/substitutions.txt'
mapping_classes = args.mappings_file[:-4].split('-')[-1]
# specific_results = open_specific_results('mappings/specific_results.txt')['B']
set_seed(42)
# ===============
# Dataset class
# ===============
class Dataset(torch.utils.data.Dataset):
def __init__(self, sentences):
self.labels = [sent[-1] for sent in sentences]
self.texts = [tokenizer(sent[-2],
is_split_into_words=True,
padding='max_length',
max_length = 512,
truncation=True,
return_tensors="pt")
for sent in sentences]
def classes(self):
return self.labels
def __len__(self):
return len(self.labels)
def get_batch_labels(self, idx):
# Fetch a batch of labels
return np.array(self.labels[idx])
def get_batch_texts(self, idx):
# Fetch a batch of inputs
return self.texts[idx]
def __getitem__(self, idx):
batch_texts = self.get_batch_texts(idx)
batch_y = self.get_batch_labels(idx)
return batch_texts, batch_y
# ===============
# Load datasets
# ===============
# Open mappings
mappings, inv_mappings = open_mappings(args.mappings_file)
batch_size = args.batch_size
tokenizer = AutoTokenizer.from_pretrained(args.transformer_model)
train_sentences, dev_dict_sentences, test_dict_sentences = open_sentences(args.data_path, mappings)
# Determine linear size (= number of classes in the sets + 1)
num_labels = len(set(sent[-1] for sent in train_sentences)) + 1
# make train/dev datasets
train_dataset = Dataset(train_sentences)
dev_dataset = {corpus: Dataset(s) for corpus, s in dev_dict_sentences.items()}
test_dataset = {corpus: Dataset(s) for corpus, s in test_dict_sentences.items()}
# Make dasets with batches and dataloader
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
dev_dict_dataloader = {corpus: DataLoader(dev_data, batch_size)
for corpus, dev_data in dev_dataset.items()}
test_dict_dataloader = {corpus: DataLoader(test_data, batch_size)
for corpus, test_data in test_dataset.items()}
# ===============
# Model setup
# ===============
class TransformerClassifier(nn.Module):
def __init__(self, dropout=args.dropout):
super(TransformerClassifier, self).__init__()
self.tr_model = AutoModel.from_pretrained(args.transformer_model)
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, num_labels) # bert input x num of classes
self.relu = nn.ReLU()
def forward(self, input_id, mask):
outputs = self.tr_model(input_ids = input_id,
attention_mask = mask,
return_dict = True)['last_hidden_state'][:, 0, :]
dropout_output = self.dropout(outputs)
linear_output = self.linear(dropout_output)
final_layer = self.relu(linear_output)
return final_layer
model = TransformerClassifier()
def train(model,
train_dataloader,
dev_dict_dataloader,
test_dict_sentences,
test_dict_dataloader,
epochs,
#specific_results
):
device = torch.device("cuda" if args.use_cuda else "cpu")
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), #Adam
lr = 2e-5, #1e-6
eps = 1e-8
)
if args.use_cuda:
model = model.cuda()
criterion = criterion.cuda()
gradient_accumulation_steps = args.gradient_accumulation_steps
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps = 0,
num_training_steps = total_steps)
seed_val = 42
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
# freeze layers, see argument in configure.py
if args.freeze_layers != '':
for name, param in model.named_parameters():
if any(x in name for x in layers_to_freeze):
param.requires_grad = False
for epoch_num in range(0, epochs):
print('\n=== Epoch {:} / {:} ==='.format(epoch_num + 1, epochs))
model.train()
total_acc_train = 0
total_loss_train = 0
batch_counter = 0
# for train_input, train_label in tqdm(train_dataloader):
for train_input, train_label in train_dataloader:
batch_counter += 1
train_label = train_label.to(device)
mask = train_input['attention_mask'].to(device)
input_id = train_input['input_ids'].squeeze(1).to(device)
output = model(input_id, mask)
# batch_loss = criterion(output, train_label.long())
# total_loss_train += batch_loss.item()
# acc = (output.argmax(dim=1) == train_label).sum().item()
# total_acc_train += acc
# Compute Loss and Perform Back-propagation
loss = criterion(output, train_label.long())
# Normalize the Gradients
loss = loss / gradient_accumulation_steps
loss.backward()
if (batch_counter % gradient_accumulation_steps == 0):
# Update Optimizer
optimizer.step() # or flip them?
optimizer.zero_grad()
model.zero_grad()
# loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# optimizer.step()
scheduler.step()
# ------ Validation --------
print('\nValidation for epoch:', epoch_num + 1)
# Dev and test results for each corpus. We don't need to save the results.
for corpus in dev_dict_dataloader:
dev_results = get_predictions(model,
corpus,
dev_dict_dataloader[corpus])
path_results = 'results/dev/pytorch_' + mapping_classes + '_' + str(epoch_num+1)
if not os.path.exists(path_results):
os.makedirs(path_results)
print_results_to_file(corpus,
dev_dict_sentences[corpus],
dev_results,
inv_mappings, #substitutions_file,
path_results)
# ------ Test --------
print('\nTest results for epoch:', epoch_num + 1)
for corpus in test_dict_dataloader:
test_results = get_predictions(model,
corpus,
test_dict_dataloader[corpus])
path_results = 'results/test/pytorch_' + mapping_classes + '_' + str(epoch_num+1)
if not os.path.exists(path_results):
os.makedirs(path_results)
print_results_to_file(corpus,
test_dict_sentences[corpus],
test_results,
inv_mappings, #substitutions_file,
path_results)
# # we want the results of specific epochs for specific corpora.
# # we define the epochs and the corpora and we save only these results.
# if epoch_num+1 in specific_results:
# for corpus in specific_results[epoch_num+1]:
# test_results = get_predictions(model,
# corpus,
# test_dict_dataloader[corpus],
# print_results=False)
# ========= New Code! =============
# Save for each epoch the dev and test results
# ------- Start the training -------
print('\nModel: ', args.transformer_model)
print('Batch size: ', args.batch_size * args.gradient_accumulation_steps)
print('\nStart training...\n')
train(model,
train_dataloader,
dev_dict_dataloader,
test_dict_sentences,
test_dict_dataloader,
args.num_epochs,
# specific_results
)
print('\nTraining Done!')
# ------- Testing ---------
# print('Testing...')
# for corpus in test_dict_dataloader:
# test_results = get_predictions(model,
# corpus,
# test_dict_dataloader[corpus]
# )
# print_results_to_file(corpus,
# test_dict_sentences[corpus],
# test_results,
# inv_mappings,
# substitutions_file)
\ No newline at end of file
adapter-transformers==3.0.1
certifi==2023.5.7
charset-normalizer
cmake==3.26.3
datasets==2.4.0
fsspec
huggingface-hub==0.14.1
idna==3.4
Jinja2==3.1.2
joblib==1.2.0
lit==16.0.3
MarkupSafe==2.1.2
mpmath==1.3.0
multidict==6.0.4
multiprocess==0.70.13
networkx==3.1
packaging==23.1
pandas==2.0.1
Pillow==9.5.0
pyarrow==12.0.0
python-dateutil==2.8.2
pytz==2023.3
PyYAML==6.0
regex==2023.5.5
requests==2.30.0
responses==0.18.0
sacremoses==0.0.53
scikit-learn==1.2.2
scipy==1.10.1
six==1.16.0
sympy==1.12
threadpoolctl==3.1.0
tokenizers==0.12.1
torch==2.0.1
torchaudio==2.0.2
torchvision
tqdm==4.65.0
transformers==4.18.0
triton==2.0.0
typing_extensions==4.5.0
tzdata==2023.3
urllib3==2.0.2
xxhash==3.2.0
yarl==1.9.2
\ No newline at end of file
#!/usr/bin/env bash
#SBATCH --job-name=adapters
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=4
#SBATCH --partition=RTX6000Node
#SBATCH --gres=gpu:1
# tests tests
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 pytorch_classifier.py --batch_size 8 --num_epochs 10 --data_path '/users/melodi/emetheni/clean_data' --mappings_file 'mappings/mappings-classes-braud.tsv'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 huggingface_classifier.py --batch_size 4 --gradient_accumulation_steps 32 --num_epochs 1 --data_path '/users/melodi/emetheni/clean_data' --mappings_file 'mappings/mappings-classes-braud.tsv'
# Train the adapter:
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 make_adapter.py --batch_size 8 --num_epochs 15 --data_path '/users/melodi/emetheni/sharedtask2023/data' --freeze_layers 'layer.1;layer.2;layer.3' --mappings_file 'mappings/mappings-classes-braud.tsv'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 make_adapter.py --batch_size 8 --num_epochs 15 --data_path '/users/melodi/emetheni/sharedtask2023/data' --freeze_layers 'layer.1;layer.2;layer.3;layer.4' --mappings_file 'mappings/mappings-classes-braud.tsv'
srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 make_adapter.py --batch_size 8 --num_epochs 15 --data_path '/users/melodi/emetheni/sharedtask2023/data' --freeze_layers 'layer.1' --mappings_file 'mappings/mappings-classes-braud.tsv'
# Run classifier with adapter for corpora:
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 1 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 2 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 3 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 4 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 5 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 6 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 1 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 2 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 3 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 4 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 5 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 6 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1'
utils.py 0 → 100644
#!/usr/bin/env python
# coding: utf-8
import os
import torch
from transformers import AutoConfig, AutoTokenizer
from configure import parse_args
import numpy as np
from sklearn.metrics import accuracy_score
args = parse_args()
def open_mappings(mappings_file):
''' Open the mappings file into a dictionary.'''
mappings = {}
with open(mappings_file, 'r') as f:
next(f)
for l in f:
mappings[l.split('\t')[0]] = int(l.strip().split('\t')[-1])
inv_mappings = {v:k for k, v in mappings.items()}
return mappings, inv_mappings
def open_file(filename, mappings_dict):
''' Function to open a .rels file.
Arguments:
- filename: the path to a .rels file
- mappings_dict: a dictionary of mappings of unique labels to integers
Returns a list of lists, where each list is:
the line + [two sentences combined with special BERT token, encoded label]
'''
max_len = 254 # 512 (max bert len) / 2 (2 sents) -2 (special tokens)
lines = []
SEP_token = '[SEP]'
with open(filename, 'r', encoding='utf-8') as f:
next(f)
for line in f:
l = line.strip().split('\t')
if len(l) > 1:
# chop the sentences to max_len if too long
sent_1 = l[3].split(' ')
sent_2 = l[4].split(' ')
if len(sent_1) > max_len:
sent_1 = sent_1[:max_len]
if len(sent_2) > max_len:
sent_2 = sent_2[:max_len]
# flip them if different direction
if args.normalize_direction == 'yes':
if l[9] == '1>2':
lines.append(l + [sent_1 + [SEP_token] + sent_2, mappings_dict[l[11].lower()]])
else:
lines.append(l + [sent_2 + [SEP_token] + sent_1, mappings_dict[l[11].lower()]])
else:
lines.append(l + [sent_1 + [SEP_token] + sent_2, mappings[l[11].lower()]])
return lines
def encode_batch(batch):
""" Encodes a batch of input data using the model tokenizer.
Works for a pandas DF column, instead of a list.
"""
tokenizer = AutoTokenizer.from_pretrained(args.transformer_model)
return tokenizer(batch["text"],
max_length=512,
truncation=True,
padding="max_length"
)
def open_sentences(path_to_corpora, mappings_dict):
''' Opens all the corpora and the surprise corpora in train/dev/test sets.
Uses the open_file() function from utils.
Returns:
- list of sentences for TRAIN: all the corpora and surprise corpora together
- dict of sentences for DEV: each dev set categorized per corpus
- dict of sentences for TEST: each test set categorized per corpus
'''
corpora = [folder for folder in os.listdir(path_to_corpora)
if not any(i in folder for i in ['.md', 'DS_', 'utils', 'ipynb'])]
# ---------------------
train_sentences = []
dev_dict_sentences = {}
test_dict_sentences = {}
for corpus in corpora:
try:
# open normal files
train_file = ['/'.join([path_to_corpora, corpus, x])
for x in os.listdir(path_to_corpora + '/' + corpus)
if 'train' in x and 'rels' in x][0]
train_sentences += open_file(train_file, mappings_dict)
except: # some of them don't have train
pass
#open each test separately
dev_dict_sentences[corpus] = []
dev_file = ['/'.join([path_to_corpora,corpus,x])
for x in os.listdir(path_to_corpora + '/' + corpus)
if 'dev' in x and 'rels' in x][0]
dev_dict_sentences[corpus] += open_file(dev_file, mappings_dict)
#open each test separately
test_dict_sentences[corpus] = []
test_file = ['/'.join([path_to_corpora,corpus,x])
for x in os.listdir(path_to_corpora + '/' + corpus)
if 'test' in x and 'rels' in x][0]
test_dict_sentences[corpus] += open_file(test_file, mappings_dict)
return train_sentences, dev_dict_sentences, test_dict_sentences
# ===============
# Testing functions
# ===============
def get_predictions(model,
corpus,
test_dataloader,
print_results=True):
''' Function to get the model's predictions for one corpus' test set.
Can print accuracy using scikit-learn.
Also works with dev sets -- just don't save the outputs.
Returns: list of predictions that match test file's lines.
'''
device = torch.device("cuda" if args.use_cuda else "cpu")
if args.use_cuda:
model = model.cuda()
model.eval()
test_loss, test_accuracy = 0, 0
all_labels = []
all_preds = []
with torch.no_grad():
for test_input, test_label in test_dataloader:
mask = test_input['attention_mask'].to(device)
input_id = test_input['input_ids'].squeeze(1).to(device)
output = model(input_id, mask)
logits = output[0]
logits = logits.detach().cpu().numpy()
label_ids = test_label.to('cpu').numpy()
all_labels += label_ids.tolist()
all_preds += output.argmax(dim=1).tolist()
assert len(all_labels) == len(all_preds)
test_acc = round(accuracy_score(all_labels, all_preds), 4)
if print_results:
print(corpus, '\tAccuracy:\t', test_acc)
return all_preds
def get_predictions_huggingface(trainer,
corpus,
test_set,
print_results=True):
''' SPECIFI FUNCTION FOR THE HUGGINGFACE TRAINER.
Function to get the model's predictions for one corpus' test set.
Can print accuracy using scikit-learn.
Also works with dev sets -- just don't save the outputs.
Returns: list of predictions that match test file's lines.
'''
results = trainer.predict(test_set)
preds = np.argmax(results.predictions, axis=1)
results = results.label_ids
test_acc = round(accuracy_score(preds, results), 4)
if print_results:
print(corpus, '\tAccuracy:\t', test_acc, '\n')
return preds
def print_results_to_file(corpus,
test_sentences,
test_results,
inv_mappings_dict,
#substitutions_file,
output_folder):
''' Function to print a new file with the test predictions per
the specifications of the Shared task.
Returns: one file per corpus with predictions.
'''
# output_folder = 'results'
header = '\t'.join(['doc',
'unit1_toks',
'unit2_toks',
'unit1_txt',
'unit2_txt',
's1_toks',
's2_toks',
'unit1_sent',
'unit2_sent',
'dir',
'orig_label',
'label',
'predicted_label'])
# # create a dict of all the substitutions that were made
# revert_substitutions = {}
# with open(substitutions_file, 'r', encoding='utf-8') as f:
# next(f)
# for line in f:
# l = line.strip().split('\t')
# if not l[1] in revert_substitutions:
# revert_substitutions[l[1]] = {}
# revert_substitutions[l[1]][l[2]] = l[0]
# save the results in a separate folder, one file per corpus
if not os.path.exists(output_folder):
os.makedirs(output_folder)
results_to_write = []
for n, sent in enumerate(test_sentences):
label = test_results[n]
label = inv_mappings_dict[label]
# try:
# if corpus in revert_substitutions:
# if label in revert_substitutions[corpus]:
# label = revert_substitutions[corpus][label]
# except:
# pass
temp = sent[:] + [label]
results_to_write.append(temp)
assert len(results_to_write) == len(test_sentences)
with open(output_folder + '/' + corpus + '.tsv', 'a+', encoding='utf-8') as f:
f.write(header + '\n')
for line in results_to_write:
f.write('\t'.join([str(x) for x in line]))
f.write('\n')
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment