diff --git a/.gitignore b/.gitignore index 83f4102d196bd47a28cbaef208f9482b4ce645c7..c77fc10d654fa51f5e38a01633db0daa1f301666 100644 --- a/.gitignore +++ b/.gitignore @@ -144,5 +144,6 @@ logs/ *.log *.out wandb/ +results/ training_output/ diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2cd9cc0150f3bff606062cff485ec89d5da9e607 --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +# DiscReT: Discourse Relation tagging +The MELODI team submission for Task 3. + +## Contents +* **mappings**: a folder with the label conversions we implemented, and specifications on which test results are created from which of our models. +* **pytorch_classifier.py**: the bare classifier using mBERT-base-cased and built on Pytorch +* **make_adapter.py**: code to create a classifier adapter, based on [AdapterHub](https://github.com/adapter-hub/adapter-transformers) +* **adapter_classifier.py**: classifier using one of the trained adapters (training the adapter beforehand is required) +* **requirements.txt**: list of dependencies +* **train_classifiers.sh**: shell script to train all classifiers +* **configure.py**: list of training arguments +* **utils.py**: various functions + +## Installation +* Pull data from the [DISRPT Shared Task repository](https://github.com/disrpt/sharedtask2023): + ``` + git clone https://github.com/disrpt/sharedtask2023 + ``` + +* Install requirements, either: + ``` + pip install -r requirements.txt + ``` + or by making a conda environment: + ``` + conda env create -f environment.yml + conda activate discret + ``` + +## Running classifiers + +The results are created by three different models: +* the **bare classifier**: an mBERT-base-cased model (max. 6 epochs) +* the **classifier with A1 adapter**: an mBERT-base-cased model trained for 3 epochs with an adapter trained with mBERT-base-cased, for 15 epochs, with frozen layer 1 +* the **classifier with A1-3 adapter**: an mBERT-base-cased model trained for 4 epochs with an adapter trained with mBERT-base-cased, for 15 epochs, with frozen layers 1-3 + +Run either the **train_classifiers.sh** script or each script individually (adapters must be trained beforehand): + +### Bare classifier +``` +python pytorch_classifier.py \ + --num_epochs 6 \ + --data_path [PATH_TO_DATA] +``` +### Adapter training + +A 1: +``` +python make_adapter.py \ + --num_epochs 15 \ + --freeze_layers 'layer.1' + --data_path [PATH_TO_DATA] +``` +A 1-3: +``` +python make_adapter.py \ + --num_epochs 15 \ + --freeze_layers 'layer.1;layer.2;layer.3' + --data_path [PATH_TO_DATA] +``` +### Classifiers with adapter +with A 1: +``` +python adapter_classifier.py \ + --num_epochs 3 \ + --data_path [PATH_TO_DATA] \ + --adapter_name 'adapter_15-epochs_frozen-1' +``` +with A 1-3: +``` +python adapter_classifier.py \ + --num_epochs 4 \ + --data_path [PATH_TO_DATA] \ + --adapter_name 'adapter_15-epochs_frozen-1-2-3' +``` diff --git a/adapter_classifier.py b/adapter_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..28eb3b131afe8850da00edc504dfce4d22b00e5b --- /dev/null +++ b/adapter_classifier.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# coding: utf-8 + +import torch +import numpy as np +from transformers import AutoModel, AutoTokenizer, get_linear_schedule_with_warmup, AutoAdapterModel, AutoModelWithHeads, AutoConfig, TrainingArguments, Trainer, EvalPrediction, set_seed +from torch import nn +from torch.optim import AdamW +from torch.utils.data import DataLoader +import torch.nn.functional as F +from torch.autograd import Variable +from tqdm import tqdm +import os +from time import sleep +from datetime import datetime +import sys +from sklearn.metrics import classification_report, accuracy_score +from utils import open_file +import pandas as pd +import datasets +from configure import parse_args +from utils import * + +args = parse_args() +now = datetime.now() +dt_string = now.strftime("%d.%m.%y-%H:%M:%S") +adapter_name = args.adapter_name +mappings, inv_mappings = open_mappings(args.mappings_file) +substitutions_file = 'mappings/substitutions.txt' +tokenizer = AutoTokenizer.from_pretrained(args.transformer_model) + +# we are saving the test results of specific epochs +# specific_results = open_specific_results('mappings/specific_results.txt') +# if '1-2-3' in adapter_name or 'layer1;layer2;layer3' in adapter_name: +# specific_results = list(specific_results['A1_3'][args.num_epochs]) +# else: +# specific_results = list(specific_results['A1'][args.num_epochs]) + +set_seed(42) + +print('Train classifier with adapter\n') +print('Adapter name:', adapter_name) +print('Model:', args.transformer_model) +print('Batch size:', args.batch_size * args.gradient_accumulation_steps) +print('Num epochs:', args.num_epochs) + +# Open mappings +mappings, inv_mappings = open_mappings(args.mappings_file) + +# Open sentences +train_sentences, dev_dict_sentences, test_dict_sentences = open_sentences(args.data_path, mappings) + +# make pandas dataframes +file_header = ['text', 'labels'] + +train_df = pd.DataFrame([[' '.join(x[-2]), x[-1]] for x in train_sentences], + columns =file_header) +train_df = train_df.sample(frac = 1) # shuffle the train + +dev_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]] + for x in sents], + columns = file_header) + for corpus, sents in dev_dict_sentences.items()} + +test_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]] + for x in sents], + columns = file_header) + for corpus, sents in test_dict_sentences.items()} + +#Make datasets from dataframes +train_dataset = datasets.Dataset.from_pandas(train_df) +dev_dict_dataset = {corpus:datasets.Dataset.from_pandas(dev_df) + for corpus, dev_df in dev_dict_df.items()} +test_dict_dataset = {corpus:datasets.Dataset.from_pandas(dev_df) + for corpus, dev_df in test_dict_df.items()} + +# get number of labels +num_labels = len(set([int(x.strip()) + for x in train_df['labels'].to_string(index=False).split('\n')])) +1 + +# Encode the data +train_dataset = train_dataset.map(encode_batch, batched=True) +train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"]) + +encoded_dev_dataset = {} +for corpus in dev_dict_dataset: + temp = dev_dict_dataset[corpus].map(encode_batch, batched=True) + temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"]) + encoded_dev_dataset[corpus] = temp + +encoded_test_dataset = {} +for corpus in test_dict_dataset: + temp = test_dict_dataset[corpus].map(encode_batch, batched=True) + temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"]) + encoded_test_dataset[corpus] = temp + +# =============================== +# Training params +# =============================== + +model = AutoAdapterModel.from_pretrained(args.transformer_model) +active_adapter = model.load_adapter(adapter_name, + config = adapter_name + "/adapter_config.json") +model.set_active_adapters(active_adapter) + + +training_args = TrainingArguments( + learning_rate = 2e-5, #1e-4, + num_train_epochs = args.num_epochs, + per_device_train_batch_size = args.batch_size, + per_device_eval_batch_size = args.batch_size, + gradient_accumulation_steps = args.gradient_accumulation_steps, + logging_steps = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)), + output_dir = "./training_output", + overwrite_output_dir =True, + remove_unused_columns=False, +) + + +trainer = Trainer( + model = model, + args = training_args, + train_dataset = train_dataset +) + +# Freeze layers in the classifier if desired +if args.freeze_layers != '': + layers_to_freeze = args.freeze_layers.split(';') + for name, param in model.named_parameters(): + if any(x in name for x in layers_to_freeze): + param.requires_grad = False + + +# =============================== +# Start the training 🚀 +# =============================== + +print('Start training...') +trainer.train() + +# Dev results + +print('\nDev results:') +for corpus in encoded_dev_dataset: + print() + dev_results = get_predictions_huggingface(trainer, corpus, + encoded_dev_dataset[corpus]) + + + path_results = 'results/dev/' + adapter_name + '_' + str(args.num_epochs) + if not os.path.exists(path_results): + os.makedirs(path_results) + + print_results_to_file(corpus, + dev_dict_sentences[corpus], + dev_results, + inv_mappings, + substitutions_file, + path_results) + +# Test results + +print('\ntest results:') +for corpus in encoded_test_dataset: + print() + test_results = get_predictions_huggingface(trainer, + corpus, + encoded_test_dataset[corpus]) + + + path_results = 'results/test/' + adapter_name + '_' + str(args.num_epochs) + if not os.path.exists(path_results): + os.makedirs(path_results) + + print_results_to_file(corpus, + test_dict_sentences[corpus], + test_results, + inv_mappings, + substitutions_file, + path_results) + + + +# for corpus in test_dict_dataloader: +# test_results = get_predictions(model, +# corpus, +# test_dict_dataloader[corpus]) + +# path_results = 'results/test/pytorch' + str(epoch_num+1) +# if not os.path.exists(path_results): +# os.makedirs(path_results) + +# print_results_to_file(corpus, +# test_dict_sentences[corpus], +# test_results, +# inv_mappings, substitutions_file, +# path_results) + + + + + + + +# Save specific test results + +# print('\nTest results:') +# for corpus in encoded_test_dataset: +# print() +# test_results = get_predictions_huggingface(trainer, corpus, +# encoded_test_dataset[corpus]) +# +# print_results_to_file(corpus, test_dict_sentences[corpus], test_results, +# inv_mappings, substitutions_file) \ No newline at end of file diff --git a/configure.py b/configure.py new file mode 100644 index 0000000000000000000000000000000000000000..89e8c0fb51a766c90915459f82e5335db16486ec --- /dev/null +++ b/configure.py @@ -0,0 +1,54 @@ +import argparse +import sys + +def parse_args(): + """ + Parse input arguments. + """ + parser = argparse.ArgumentParser() + + parser.add_argument("--data_path", default="../sharedtask2023/data", type=str, + help="The path to the shared task data file from Github.") + + parser.add_argument("--mappings_file", default="mappings/mappings_substitutions.tsv", type=str, + help="The mappings file for all relations.") + + # transformer model + parser.add_argument("--transformer_model", default="bert-base-multilingual-cased", type=str, + help="Model used, default: bert-multilingual-base-cased") + + # Number of training epochs + parser.add_argument("--num_epochs", default=4, type=int, + help="Number of training epochs. Default: 4") + + # Number of gradient accumulation steps + parser.add_argument("--gradient_accumulation_steps", default=16, type=int, + help="Number of gradient accumulation steps. Default: 16") + + # Dropout + parser.add_argument("--dropout", default=0.1, type=float, + help="Dropout.") + + # Batch size + parser.add_argument("--batch_size", default=8, type=int, + help="With CUDA: max. 8, without: max. 16. Default: 8") + + # Use CUDA + parser.add_argument("--use_cuda", default='yes', type=str, + help="Use CUDA [yes/no]. Careful of batch size!") + + # freeze layers + parser.add_argument("--freeze_layers", default='', type=str, + help="List of layer(s) to freeze, a str separated by ;. Example: 'layer.1;layer.2'") + + # load adapter + parser.add_argument("--adapter_name", default='', type=str, + help="If you want to use an adapter") + + # normalize direction + parser.add_argument("--normalize_direction", default='yes', type=str, + help="Change order of sentences when the direction of relations is 1<2 to 2>1.") + + args = parser.parse_args() + + return args \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..bcc7776ff1b87b6e69601ce6ecaf1a138f07b1bb --- /dev/null +++ b/environment.yml @@ -0,0 +1,88 @@ +name: discret +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - ca-certificates=2023.01.10=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - ncurses=6.4=h6a678d5_0 + - openssl=1.1.1t=h7f8727e_0 + - pip=23.0.1=py38h06a4308_0 + - python=3.8.16=h7a1cb2a_3 + - readline=8.2=h5eee18b_0 + - setuptools=66.0.0=py38h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - wheel=0.38.4=py38h06a4308_0 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - adapter-transformers==3.0.1 + - aiohttp==3.8.4 + - aiosignal==1.3.1 + - async-timeout==4.0.2 + - attrs==23.1.0 + - certifi==2023.5.7 + - charset-normalizer==3.1.0 + - click==8.1.3 + - cmake==3.26.3 + - datasets==2.4.0 + - dill==0.3.5.1 + - filelock==3.12.0 + - frozenlist==1.3.3 + - fsspec==2023.5.0 + - huggingface-hub==0.14.1 + - idna==3.4 + - jinja2==3.1.2 + - joblib==1.2.0 + - lit==16.0.3 + - markupsafe==2.1.2 + - mpmath==1.3.0 + - multidict==6.0.4 + - multiprocess==0.70.13 + - networkx==3.1 + - numpy==1.24.3 + - nvidia-cublas-cu11==11.10.3.66 + - nvidia-cuda-cupti-cu11==11.7.101 + - nvidia-cuda-nvrtc-cu11==11.7.99 + - nvidia-cuda-runtime-cu11==11.7.99 + - nvidia-cudnn-cu11==8.5.0.96 + - nvidia-cufft-cu11==10.9.0.58 + - nvidia-curand-cu11==10.2.10.91 + - nvidia-cusolver-cu11==11.4.0.1 + - nvidia-cusparse-cu11==11.7.4.91 + - nvidia-nccl-cu11==2.14.3 + - nvidia-nvtx-cu11==11.7.91 + - packaging==23.1 + - pandas==2.0.1 + - pillow==9.5.0 + - pyarrow==12.0.0 + - python-dateutil==2.8.2 + - pytz==2023.3 + - pyyaml==6.0 + - regex==2023.5.5 + - requests==2.30.0 + - responses==0.18.0 + - sacremoses==0.0.53 + - scikit-learn==1.2.2 + - scipy==1.10.1 + - six==1.16.0 + - sympy==1.12 + - threadpoolctl==3.1.0 + - tokenizers==0.12.1 + - torch==2.0.1 + - torchaudio==2.0.2 + - torchvision==0.15.2 + - tqdm==4.65.0 + - transformers==4.18.0 + - triton==2.0.0 + - typing-extensions==4.5.0 + - tzdata==2023.3 + - urllib3==2.0.2 + - xxhash==3.2.0 + - yarl==1.9.2ename \ No newline at end of file diff --git a/huggingface_classifier.py b/huggingface_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..7d90d9acd4232a9bb52a2ffdeb346f5596bc452e --- /dev/null +++ b/huggingface_classifier.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python +# coding: utf-8 + +import torch +import numpy as np +from transformers import AutoModel, AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, AutoConfig, TrainingArguments, Trainer, EvalPrediction, set_seed +from torch import nn +from torch.optim import AdamW +from torch.utils.data import DataLoader +import torch.nn.functional as F +from torch.autograd import Variable +from tqdm import tqdm +import os +from time import sleep +from datetime import datetime +import sys +from sklearn.metrics import classification_report, accuracy_score +from utils import open_file +import pandas as pd +import datasets +from configure import parse_args +from utils import * + +args = parse_args() +now = datetime.now() +dt_string = now.strftime("%d.%m.%y-%H:%M:%S") +save_name = args.mappings_file.split('-')[-1] +mappings, inv_mappings = open_mappings(args.mappings_file) +substitutions_file = 'mappings/substitutions.txt' +tokenizer = AutoTokenizer.from_pretrained(args.transformer_model) + + +set_seed(42) + +print('Model:', args.transformer_model) +print('Batch size:', args.batch_size * args.gradient_accumulation_steps) +print('Num epochs:', args.num_epochs) + +# Open mappings +mappings, inv_mappings = open_mappings(args.mappings_file) + +# Open sentences +train_sentences, dev_dict_sentences, test_dict_sentences = open_sentences(args.data_path, mappings) + +# make pandas dataframes +file_header = ['text', 'labels'] + +train_df = pd.DataFrame([[' '.join(x[-2]), x[-1]] for x in train_sentences], + columns =file_header) +train_df = train_df.sample(frac = 1) # shuffle the train + +dev_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]] + for x in sents], + columns = file_header) + for corpus, sents in dev_dict_sentences.items()} + +test_dict_df = {corpus : pd.DataFrame([[' '.join(x[-2]), x[-1]] + for x in sents], + columns = file_header) + for corpus, sents in test_dict_sentences.items()} + +#Make datasets from dataframes +train_dataset = datasets.Dataset.from_pandas(train_df) +dev_dict_dataset = {corpus:datasets.Dataset.from_pandas(dev_df) + for corpus, dev_df in dev_dict_df.items()} +test_dict_dataset = {corpus:datasets.Dataset.from_pandas(dev_df) + for corpus, dev_df in test_dict_df.items()} + +# get number of labels +num_labels = len(set([int(x.strip()) + for x in train_df['labels'].to_string(index=False).split('\n')])) +1 + +# Encode the data +train_dataset = train_dataset.map(encode_batch, batched=True) +train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"]) + +encoded_dev_dataset = {} +for corpus in dev_dict_dataset: + temp = dev_dict_dataset[corpus].map(encode_batch, batched=True) + temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"]) + encoded_dev_dataset[corpus] = temp + +encoded_test_dataset = {} +for corpus in test_dict_dataset: + temp = test_dict_dataset[corpus].map(encode_batch, batched=True) + temp.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"]) + encoded_test_dataset[corpus] = temp + +# =============================== +# Training params +# =============================== + +model = AutoModelForSequenceClassification.from_pretrained(args.transformer_model) + + +training_args = TrainingArguments( + learning_rate = 2e-5, #1e-4, + num_train_epochs = args.num_epochs, + per_device_train_batch_size = args.batch_size, + per_device_eval_batch_size = args.batch_size, + gradient_accumulation_steps = args.gradient_accumulation_steps, + logging_steps = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)), + output_dir = "./training_output", + overwrite_output_dir =True, + remove_unused_columns=False, +) + + +trainer = Trainer( + model = model, + args = training_args, + train_dataset = train_dataset +) + +# Freeze layers in the classifier if desired +if args.freeze_layers != '': + layers_to_freeze = args.freeze_layers.split(';') + for name, param in model.named_parameters(): + if any(x in name for x in layers_to_freeze): + param.requires_grad = False + + +# =============================== +# Start the training 🚀 +# =============================== + +print('Start training...') +trainer.train() + +# Dev results + +print('\nDev results:') +for corpus in encoded_dev_dataset: + print() + dev_results = get_predictions_huggingface(trainer, corpus, + encoded_dev_dataset[corpus]) + + + path_results = 'results/dev/' + save_name + '_' + str(args.num_epochs) + if not os.path.exists(path_results): + os.makedirs(path_results) + + print_results_to_file(corpus, + dev_dict_sentences[corpus], + dev_results, + inv_mappings, + #substitutions_file, + path_results) + +# Test results + +print('\ntest results:') +for corpus in encoded_test_dataset: + print() + test_results = get_predictions_huggingface(trainer, + corpus, + encoded_test_dataset[corpus]) + + + path_results = 'results/test/' + save_name + '_' + str(args.num_epochs) + if not os.path.exists(path_results): + os.makedirs(path_results) + + print_results_to_file(corpus, + test_dict_sentences[corpus], + test_results, + inv_mappings, + substitutions_file, + path_results) + + + +# for corpus in test_dict_dataloader: +# test_results = get_predictions(model, +# corpus, +# test_dict_dataloader[corpus]) + +# path_results = 'results/test/pytorch' + str(epoch_num+1) +# if not os.path.exists(path_results): +# os.makedirs(path_results) + +# print_results_to_file(corpus, +# test_dict_sentences[corpus], +# test_results, +# inv_mappings, substitutions_file, +# path_results) + + + + + + + +# Save specific test results + +# print('\nTest results:') +# for corpus in encoded_test_dataset: +# print() +# test_results = get_predictions_huggingface(trainer, corpus, +# encoded_test_dataset[corpus]) +# +# print_results_to_file(corpus, test_dict_sentences[corpus], test_results, +# inv_mappings, substitutions_file) \ No newline at end of file diff --git a/make_adapter.py b/make_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..3cb8c11097203dbeaac68d97c78fb9f97c35013f --- /dev/null +++ b/make_adapter.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# coding: utf-8 + +import os +import numpy as np +from datetime import datetime +import pandas as pd +import torch +from transformers import AutoModel, AutoTokenizer, AutoModelWithHeads, AutoConfig, TrainingArguments, AdapterTrainer, EvalPrediction, set_seed +import datasets +from configure import parse_args +from sklearn.metrics import accuracy_score +from utils import * + +# parameters +args = parse_args() +tokenizer = AutoTokenizer.from_pretrained(args.transformer_model) +layers_to_freeze = args.freeze_layers.split(';') +set_seed(42) +batch_size = args.batch_size +mapping_classes = args.mappings_file[:-4].split('-')[-1] + +# Set name for adapter +adapter_name = 'A_' + str(args.num_epochs) + '-F_' + args.freeze_layers.replace('layer.', '-').replace(';', '') + '-M_' + mapping_classes + +print('Create classifier adapter\n') +print('Name:', adapter_name) +print('Model:', args.transformer_model) +print('Batch size:', args.batch_size * args.gradient_accumulation_steps) +print('Frozen layers:', args.freeze_layers.replace(';', ', ')) + +# Open mappings +mappings, inv_mappings = open_mappings(args.mappings_file) + +# Open sentences +train_sentences, dev_dict_sentences, _ = open_sentences(args.data_path, mappings) + +# make pandas dataframes +file_header = ['text', 'labels'] +train_df = pd.DataFrame([[' '.join(x[-2]), x[-1]] for x in train_sentences], columns=file_header) +train_df = train_df.sample(frac = 1) # shuffle the train +# get a global dev accuracy, we will not be directly using these results +dev_df = pd.DataFrame([[' '.join(x[-2]), x[-1]] + for sents in dev_dict_sentences.values() + for x in sents ], columns=file_header) + +#Make datasets from dataframes +train_dataset = datasets.Dataset.from_pandas(train_df) +dev_dataset = datasets.Dataset.from_pandas(dev_df) + +# get number of labels +num_labels = len(set([int(x.strip()) + for x in train_df['labels'].to_string(index=False).split('\n')])) +1 + +# Encode the data +train_dataset = train_dataset.map(encode_batch, batched=True) +train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"]) + +dev_dataset = dev_dataset.map(encode_batch, batched=True) +dev_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"]) + + +# Training + +config = AutoConfig.from_pretrained( + args.transformer_model, + num_labels=num_labels, +) +model = AutoModelWithHeads.from_pretrained( + args.transformer_model, + config=config, +) + +# Add a new adapter +model.add_adapter(adapter_name) +# Add a matching classification head +model.add_classification_head( + adapter_name, + num_labels=num_labels, + id2label=inv_mappings + ) + +# Activate the adapter +print('Initialize adapter...') +model.train_adapter(adapter_name) + +training_args = TrainingArguments( + learning_rate = 1e-4, + num_train_epochs = args.num_epochs, + per_device_train_batch_size = args.batch_size, + per_device_eval_batch_size = args.batch_size, + gradient_accumulation_steps = args.gradient_accumulation_steps, + logging_steps = (len(train_sentences)/(args.batch_size * args.gradient_accumulation_steps)), + output_dir = "./training_output", + overwrite_output_dir =True, + remove_unused_columns=False, +) + +trainer = AdapterTrainer( + model=model, + args=training_args, + train_dataset=train_dataset, +) + +# freeze layers +if args.freeze_layers != '': + for name, param in model.named_parameters(): + if any(x in name for x in layers_to_freeze): + param.requires_grad = False + +# Start the training 🚀 +print('\nStart training...\n') +trainer.train() + +# Save adapter to load for the finetuned model +model.save_adapter(adapter_name, adapter_name) + +# Perform evaluation +results = trainer.predict(dev_dataset) +preds = np.argmax(results.predictions, axis=1) +results = results.label_ids +print('Dev accuracy:', round(accuracy_score(preds, results), 4)) \ No newline at end of file diff --git a/mappings/mappings-classes-braud.tsv b/mappings/mappings-classes-braud.tsv new file mode 100644 index 0000000000000000000000000000000000000000..f4ca27dfd29916320dc5f7b4662c0cecc2ca4a64 --- /dev/null +++ b/mappings/mappings-classes-braud.tsv @@ -0,0 +1,163 @@ +LABEL CLASS MAPPING +adversative contrast 1 +adversative-antithesis contrast 1 +adversative-concession contrast 1 +adversative-contrast contrast 1 +alternative condition 10 +antithesis contrast 1 +attribution attribution 2 +attribution-negative attribution 2 +attribution-positive attribution 2 +background background 8 +causal cause 6 +causal-cause cause 6 +causal-result cause 6 +cause cause 6 +cause-effect cause 6 +circumstance background 8 +comparison comparison 12 +concession contrast 1 +conclusion evaluation 13 +condition condition 10 +conjunction joint 16 +context explanation 15 +context-background background 8 +context-circumstance background 8 +contingency condition 10 +contingency-condition condition 10 +contrast contrast 1 +disjunction same-unit 11 +e-elaboration elaboration 3 +effect cause 6 +elaboration elaboration 3 +elaboration-additional elaboration 3 +elaboration-attribute elaboration 3 +enablement enablement 14 +evaluation evaluation 13 +evaluation-comment evaluation 13 +evaluation-n evaluation 13 +evaluation-s evaluation 13 +evidence explanation 15 +explanation explanation 15 +explanation-evidence explanation 15 +explanation-justify explanation 15 +explanation-motivation explanation 15 +interpretation evaluation 13 +interpretation-evaluation evaluation 13 +joint joint 16 +joint-disjunction joint 16 +joint-list joint 16 +joint-other joint 16 +joint-sequence temporal 17 +justify explanation 15 +list joint 16 +manner-means manner-means 4 +means manner-means 4 +mode manner-means 4 +mode-manner manner-means 4 +mode-means manner-means 4 +motivation explanation 15 +nonvolitional-cause cause 6 +nonvolitional-cause-e cause 6 +nonvolitional-result cause 6 +nonvolitional-result-e cause 6 +organization textual-organization 0 +organization-heading textual-organization 0 +organization-phatic textual-organization 0 +organization-preparation textual-organization 0 +otherwise condition 10 +parenthetical same-unit 11 +preparation background 8 +purpose enablement 14 +purpose-attribute enablement 14 +purpose-goal enablement 14 +reason explanation 15 +restatement summary 5 +restatement-mn summary 5 +restatement-partial summary 5 +restatement-repetition summary 5 +result cause 6 +sequence temporal 17 +solutionhood topic-comment 7 +summary summary 5 +temporal temporal 17 +textual-organization textual-organization 0 +topic topic-comment 7 +topic-change topic-change 9 +topic-comment topic-comment 7 +topic-drift topic-change 9 +topic-question topic-comment 7 +topic-solutionhood topic-comment 7 +unconditional condition 10 +unless condition 10 +volitional-cause cause 6 +volitional-result cause 6 +causation cause 6 +comparison.concession contrast 1 +comparison.concession+speechact comparison 12 +comparison.contrast contrast 1 +comparison.degree comparison 12 +comparison.similarity comparison 12 +conditional condition 10 +contingency.cause condition 10 +contingency.cause+belief condition 10 +contingency.cause+speechact condition 10 +contingency.condition condition 10 +contingency.condition+speechact condition 10 +contingency.goal condition 10 +contingency.negative-cause cause 6 +contingency.negative-condition condition 10 +contingency.purpose enablement 14 +expansion elaboration 3 +expansion.alternative condition 10 +expansion.conjunction joint 16 +expansion.correction contrast 1 +expansion.disjunction cause 6 +expansion.equivalence comparison 12 +expansion.exception contrast 1 +expansion.instantiation elaboration 3 +expansion.level-of-detail elaboration 3 +expansion.manner manner-means 4 +expansion.restatement summary 5 +expansion.substitution contrast 1 +hypophora topic-comment 7 +interrupted topic-change 9 +progression temporal 17 +repetition elaboration 3 +temporal.asynchronous temporal 17 +temporal.synchronous temporal 17 +temporal.synchrony temporal 17 +qap topic-comment 7 +contingency.negative-condition+speechact condition 10 +contingency.negative condition 10 +expansion.genexpansion elaboration 3 +expansion.level elaboration 3 +qap.hypophora topic-comment 7 +bg-compare background 8 +bg-general background 8 +bg-goal background 8 +cause-result cause 6 +elab-addition elaboration 3 +elab-aspect elaboration 3 +elab-definition elaboration 3 +elab-enumember elaboration 3 +elab-example elaboration 3 +elab-process_step elaboration 3 +exp-evidence explanation 15 +exp-reason explanation 15 +findings cause 6 +acknowledgement attribution 2 +alternation condition 10 +clarification_question topic-comment 7 +comment evaluation 13 +continuation joint 16 +correction contrast 1 +explanation* explanation 15 +flashback explanation 15 +frame explanation 15 +goal enablement 14 +narration elaboration 3 +parallel joint 16 +q_elab elaboration 3 +question_answer_pair topic-comment 7 +temploc temporal 17 diff --git a/mappings/mappings-classes-rst.tsv b/mappings/mappings-classes-rst.tsv new file mode 100644 index 0000000000000000000000000000000000000000..8938d6b4e12bbe7ff1f0cd1b474c9fa5da1518bf --- /dev/null +++ b/mappings/mappings-classes-rst.tsv @@ -0,0 +1,163 @@ +LABEL CLASS MAPPING +adversative contrast 1 +adversative-antithesis contrast 1 +adversative-concession contrast 1 +adversative-contrast contrast 1 +alternative condition 10 +antithesis contrast 1 +attribution attribution 2 +attribution-negative attribution 2 +attribution-positive attribution 2 +background background 8 +causal cause 6 +causal-cause cause 6 +causal-result cause 6 +cause cause 6 +cause-effect cause 6 +circumstance background 8 +comparison comparison 11 +concession contrast 1 +conclusion evaluation 12 +condition condition 10 +conjunction joint 15 +context background 8 +context-background background 8 +context-circumstance background 8 +contingency condition 10 +contingency-condition condition 10 +contrast contrast 1 +disjunction joint 15 +e-elaboration elaboration 3 +effect cause 6 +elaboration elaboration 3 +elaboration-additional elaboration 3 +elaboration-attribute elaboration 3 +enablement enablement 13 +evaluation evaluation 12 +evaluation-comment evaluation 12 +evaluation-n evaluation 12 +evaluation-s evaluation 12 +evidence explanation 14 +explanation explanation 14 +explanation-evidence explanation 14 +explanation-justify explanation 14 +explanation-motivation explanation 14 +interpretation evaluation 12 +interpretation-evaluation evaluation 12 +joint joint 15 +joint-disjunction joint 15 +joint-list joint 15 +joint-other joint 15 +joint-sequence temporal 16 +justify explanation 14 +list joint 15 +manner-means manner-means 4 +means manner-means 4 +mode manner-means 4 +mode-manner manner-means 4 +mode-means manner-means 4 +motivation explanation 14 +nonvolitional-cause cause 6 +nonvolitional-cause-e cause 6 +nonvolitional-result cause 6 +nonvolitional-result-e cause 6 +organization background 8 +organization-heading background 8 +organization-phatic background 8 +organization-preparation background 8 +otherwise condition 10 +parenthetical elaboration 3 +preparation background 8 +purpose enablement 13 +purpose-attribute enablement 13 +purpose-goal enablement 13 +reason explanation 14 +restatement summary 5 +restatement-mn summary 5 +restatement-partial summary 5 +restatement-repetition summary 5 +result cause 6 +sequence temporal 16 +solutionhood topic-comment 7 +summary summary 5 +temporal temporal 16 +textual-organization background 8 +topic topic-comment 7 +topic-change topic-change 9 +topic-comment topic-comment 7 +topic-drift topic change 0 +topic-question topic-comment 7 +topic-solutionhood topic-comment 7 +unconditional condition 10 +unless condition 10 +volitional-cause cause 6 +volitional-result cause 6 +causation cause 6 +comparison.concession contrast 1 +comparison.concession+speechact comparison 11 +comparison.contrast contrast 1 +comparison.degree comparison 11 +comparison.similarity comparison 11 +conditional condition 10 +contingency.cause condition 10 +contingency.cause+belief condition 10 +contingency.cause+speechact condition 10 +contingency.condition condition 10 +contingency.condition+speechact condition 10 +contingency.goal condition 10 +contingency.negative-cause cause 6 +contingency.negative-condition condition 10 +contingency.purpose enablement 13 +expansion elaboration 3 +expansion.alternative condition 10 +expansion.conjunction joint 15 +expansion.correction contrast 1 +expansion.disjunction joint 15 +expansion.equivalence comparison 11 +expansion.exception contrast 1 +expansion.instantiation elaboration 3 +expansion.level-of-detail elaboration 3 +expansion.manner manner-means 4 +expansion.restatement summary 5 +expansion.substitution contrast 1 +hypophora topic-comment 7 +interrupted topic-change 9 +progression temporal 16 +repetition elaboration 3 +temporal.asynchronous temporal 16 +temporal.synchronous temporal 16 +temporal.synchrony temporal 16 +qap topic-comment 7 +contingency.negative-condition+speechact condition 10 +contingency.negative condition 10 +expansion.genexpansion elaboration 3 +expansion.level elaboration 3 +qap.hypophora topic-comment 7 +bg-compare background 8 +bg-general background 8 +bg-goal background 8 +cause-result cause 6 +elab-addition elaboration 3 +elab-aspect elaboration 3 +elab-definition elaboration 3 +elab-enumember elaboration 3 +elab-example elaboration 3 +elab-process_step elaboration 3 +exp-evidence explanation 14 +exp-reason explanation 14 +findings cause 6 +acknowledgement attribution 2 +alternation condition 10 +clarification_question topic-comment 7 +comment evaluation 12 +continuation joint 15 +correction contrast 1 +explanation* explanation 14 +flashback explanation 14 +frame explanation 14 +goal enablement 13 +narration elaboration 3 +parallel joint 15 +q_elab elaboration 3 +question_answer_pair topic-comment 7 +temploc temporal 16 diff --git a/mappings/mappings_substitutions.tsv b/mappings/mappings_substitutions.tsv new file mode 100644 index 0000000000000000000000000000000000000000..52b18b508dc6caa76872254d3c80698e8497399a --- /dev/null +++ b/mappings/mappings_substitutions.tsv @@ -0,0 +1,163 @@ +mode-means 0 +expansion.restatement 1 +expansion.substitution 2 +bg-compare 3 +root 4 +organization-preparation 5 +topic-solutionhood 6 +evaluation-n 7 +contingency.negative-cause 8 +organization 9 +causal 10 +elab-enumember 11 +organization-phatic 12 +purpose-attribute 13 +mode 14 +temporal 15 +contingency.cause+belief 16 +means 17 +expansion 18 +comparison.concession+speechact 19 +parallel 20 +contingency.condition 21 +context-circumstance 22 +restatement-partial 23 +expansion.equivalence 24 +interrupted 25 +contingency.negative-condition 26 +comment 27 +organization-heading 28 +joint-other 29 +result 30 +expansion.alternative 31 +parenthetical 32 +clarification_question 33 +background 34 +conjunction 77 +nonvolitional-result-e 36 +manner-means 37 +elaboration-additional 38 +attribution 39 +volitional-result 40 +contingency.negative 41 +mode-manner 42 +expansion.level-of-detail 43 +topic-comment 44 +joint-sequence 45 +elab-addition 46 +explanation* 47 +comparison.similarity 48 +reason 49 +solutionhood 50 +nonvolitional-cause 51 +contingency.negative-condition+speechact 52 +topic-question 53 +elab-definition 54 +hypophora 55 +adversative 56 +elaboration-attribute 57 +nonvolitional-result 58 +joint 59 +bg-goal 60 +contrast 61 +explanation-justify 62 +context-background 63 +topic-drift 64 +contingency.purpose 65 +explanation 66 +elaboration 67 +elab-example 68 +evaluation-comment 69 +continuation 70 +exp-reason 71 +interpretation 72 +conclusion 73 +attribution-negative 74 +flashback 75 +frame 76 +expansion.conjunction 77 +preparation 78 +temporal.asynchronous 79 +attribution-positive 80 +acknowledgement 81 +comparison.contrast 82 +condition 83 +contingency.goal 84 +restatement-repetition 85 +temploc 86 +adversative-contrast 87 +topic-change 88 +context 89 +effect 90 +expansion.correction 91 +contingency.cause 92 +progression 93 +evaluation-s 94 +explanation-evidence 95 +volitional-cause 96 +concession 97 +expansion.exception 98 +summary 99 +comparison.degree 100 +adversative-concession 101 +comparison 102 +topic 103 +expansion.instantiation 104 +purpose-goal 105 +evaluation 106 +expansion.disjunction 107 +explanation-motivation 108 +nonvolitional-cause-e 109 +question_answer_pair 110 +restatement-mn 111 +contingency.cause+speechact 112 +cause-effect 113 +purpose 114 +enablement 115 +cause 116 +e-elaboration 117 +contingency.condition+speechact 118 +interpretation-evaluation 119 +adversative-antithesis 120 +antithesis 121 +expansion.manner 122 +comparison.concession 123 +narration 124 +contingency-condition 125 +contingency 126 +temporal.synchronous 127 +circumstance 128 +q_elab 129 +causal-cause 130 +joint-list 131 +elab-aspect 132 +elab-process_step 133 +causal-result 134 +alternation 31 +conditional 83 +goal 105 +correction 91 +alternative 31 +disjunction 107 +evidence 95 +justify 62 +list 131 +motivation 108 +restatement 1 +sequence 45 +unless 61 +causation 116 +bg-general 34 +exp-evidence 95 +otherwise 56 +unconditional 107 +joint-disjunction 107 +repetition 85 +temporal.synchrony 127 +textual-organization 9 +cause-result 113 +findings 30 +qap 110 +expansion.level 43 +qap.hypophora 55 +expansion.genexpansion 18 diff --git a/mappings/specific_results.txt b/mappings/specific_results.txt new file mode 100644 index 0000000000000000000000000000000000000000..46f7a3786a570956d9e5a8b37204a80d501b10e4 --- /dev/null +++ b/mappings/specific_results.txt @@ -0,0 +1,27 @@ +BEST EPOCH Corpus +B 3 deu.rst.pcc +A1_3 4 eng.dep.covdtb +B 4 eng.dep.scidtb +B 4 eng.pdtb.pdtb +A1_3 4 eng.pdtb.tedm +A1_3 4 eng.rst.gum +B 5 eng.rst.rstdt +A1_3 4 eng.sdrt.stac +A1_3 4 eus.rst.ert +B 3 fas.rst.prstc +A1_3 4 fra.sdrt.annodis +A1_3 4 ita.pdtb.luna +A1_3 4 nld.rst.nldt +A1 3 por.pdtb.crpc +A1_3 4 por.pdtb.tedm +A1_3 4 por.rst.cstn +A1_3 4 rus.rst.rrt +A1_3 4 spa.rst.rststb +B 5 spa.rst.sctb +A1_3 4 tha.pdtb.tdtb +B 3 tur.pdtb.tdb +A1_3 4 tur.pdtb.tedm +A1 3 zho.dep.scidtb +B 4 zho.pdtb.cdtb +A1 3 zho.rst.gcdt +A1_3 4 zho.rst.sctb \ No newline at end of file diff --git a/mappings/substitions-classes-braud.tsv b/mappings/substitions-classes-braud.tsv new file mode 100644 index 0000000000000000000000000000000000000000..ae4b1fd5d5249f2af6313cbf0b3fca01c4d287a0 --- /dev/null +++ b/mappings/substitions-classes-braud.tsv @@ -0,0 +1,163 @@ +LABEL CLASS +adversative contrast +adversative-antithesis contrast +adversative-concession contrast +adversative-contrast contrast +alternative condition +antithesis contrast +attribution attribution +attribution-negative attribution +attribution-positive attribution +background background +causal cause +causal-cause cause +causal-result cause +cause cause +cause-effect cause +circumstance background +comparison comparison +concession contrast +conclusion evaluation +condition condition +conjunction joint +context explanation +context-background background +context-circumstance background +contingency condition +contingency-condition condition +contrast contrast +disjunction same-unit +e-elaboration elaboration +effect cause +elaboration elaboration +elaboration-additional elaboration +elaboration-attribute elaboration +enablement enablement +evaluation evaluation +evaluation-comment evaluation +evaluation-n evaluation +evaluation-s evaluation +evidence explanation +explanation explanation +explanation-evidence explanation +explanation-justify explanation +explanation-motivation explanation +interpretation evaluation +interpretation-evaluation evaluation +joint joint +joint-disjunction joint +joint-list joint +joint-other joint +joint-sequence temporal +justify explanation +list joint +manner-means manner-means +means manner-means +mode manner-means +mode-manner manner-means +mode-means manner-means +motivation explanation +nonvolitional-cause cause +nonvolitional-cause-e cause +nonvolitional-result cause +nonvolitional-result-e cause +organization textual-organization +organization-heading textual-organization +organization-phatic textual-organization +organization-preparation textual-organization +otherwise condition +parenthetical same-unit +preparation background +purpose enablement +purpose-attribute enablement +purpose-goal enablement +reason explanation +restatement summary +restatement-mn summary +restatement-partial summary +restatement-repetition summary +result cause +sequence temporal +solutionhood topic-comment +summary summary +temporal temporal +textual-organization textual-organization +topic topic-comment +topic-change topic-change +topic-comment topic-comment +topic-drift topic-change +topic-question topic-comment +topic-solutionhood topic-comment +unconditional condition +unless condition +volitional-cause cause +volitional-result cause +causation cause +comparison.concession contrast +comparison.concession+speechact comparison +comparison.contrast contrast +comparison.degree comparison +comparison.similarity comparison +conditional condition +contingency.cause condition +contingency.cause+belief condition +contingency.cause+speechact condition +contingency.condition condition +contingency.condition+speechact condition +contingency.goal condition +contingency.negative-cause cause +contingency.negative-condition condition +contingency.purpose enablement +expansion elaboration +expansion.alternative condition +expansion.conjunction joint +expansion.correction contrast +expansion.disjunction cause +expansion.equivalence comparison +expansion.exception contrast +expansion.instantiation elaboration +expansion.level-of-detail elaboration +expansion.manner manner-means +expansion.restatement summary +expansion.substitution contrast +hypophora topic-comment +interrupted topic-change +progression temporal +repetition elaboration +temporal.asynchronous temporal +temporal.synchronous temporal +temporal.synchrony temporal +qap topic-comment +contingency.negative-condition+speechact condition +contingency.negative condition +expansion.genexpansion elaboration +expansion.level elaboration +qap.hypophora topic-comment +bg-compare background +bg-general background +bg-goal background +cause-result cause +elab-addition elaboration +elab-aspect elaboration +elab-definition elaboration +elab-enumember elaboration +elab-example elaboration +elab-process_step elaboration +exp-evidence explanation +exp-reason explanation +findings cause +acknowledgement attribution +alternation condition +clarification_question topic-comment +comment evaluation +continuation joint +correction contrast +explanation* explanation +flashback explanation +frame explanation +goal enablement +narration elaboration +parallel joint +q_elab elaboration +question_answer_pair topic-comment +temploc temporal diff --git a/mappings/substitions-classes-rst.tsv b/mappings/substitions-classes-rst.tsv new file mode 100644 index 0000000000000000000000000000000000000000..64485b9c0d9fac1701d0d614846210762a872f74 --- /dev/null +++ b/mappings/substitions-classes-rst.tsv @@ -0,0 +1,163 @@ +LABEL CLASS +adversative contrast +adversative-antithesis contrast +adversative-concession contrast +adversative-contrast contrast +alternative condition +antithesis contrast +attribution attribution +attribution-negative attribution +attribution-positive attribution +background background +causal cause +causal-cause cause +causal-result cause +cause cause +cause-effect cause +circumstance background +comparison comparison +concession contrast +conclusion evaluation +condition condition +conjunction joint +context background +context-background background +context-circumstance background +contingency condition +contingency-condition condition +contrast contrast +disjunction joint +e-elaboration elaboration +effect cause +elaboration elaboration +elaboration-additional elaboration +elaboration-attribute elaboration +enablement enablement +evaluation evaluation +evaluation-comment evaluation +evaluation-n evaluation +evaluation-s evaluation +evidence explanation +explanation explanation +explanation-evidence explanation +explanation-justify explanation +explanation-motivation explanation +interpretation evaluation +interpretation-evaluation evaluation +joint joint +joint-disjunction joint +joint-list joint +joint-other joint +joint-sequence temporal +justify explanation +list joint +manner-means manner-means +means manner-means +mode manner-means +mode-manner manner-means +mode-means manner-means +motivation explanation +nonvolitional-cause cause +nonvolitional-cause-e cause +nonvolitional-result cause +nonvolitional-result-e cause +organization background +organization-heading background +organization-phatic background +organization-preparation background +otherwise condition +parenthetical elaboration +preparation background +purpose enablement +purpose-attribute enablement +purpose-goal enablement +reason explanation +restatement summary +restatement-mn summary +restatement-partial summary +restatement-repetition summary +result cause +sequence temporal +solutionhood topic-comment +summary summary +temporal temporal +textual-organization background +topic topic-comment +topic-change topic-change +topic-comment topic-comment +topic-drift topic change +topic-question topic-comment +topic-solutionhood topic-comment +unconditional condition +unless condition +volitional-cause cause +volitional-result cause +causation cause +comparison.concession contrast +comparison.concession+speechact comparison +comparison.contrast contrast +comparison.degree comparison +comparison.similarity comparison +conditional condition +contingency.cause condition +contingency.cause+belief condition +contingency.cause+speechact condition +contingency.condition condition +contingency.condition+speechact condition +contingency.goal condition +contingency.negative-cause cause +contingency.negative-condition condition +contingency.purpose enablement +expansion elaboration +expansion.alternative condition +expansion.conjunction joint +expansion.correction contrast +expansion.disjunction joint +expansion.equivalence comparison +expansion.exception contrast +expansion.instantiation elaboration +expansion.level-of-detail elaboration +expansion.manner manner-means +expansion.restatement summary +expansion.substitution contrast +hypophora topic-comment +interrupted topic-change +progression temporal +repetition elaboration +temporal.asynchronous temporal +temporal.synchronous temporal +temporal.synchrony temporal +qap topic-comment +contingency.negative-condition+speechact condition +contingency.negative condition +expansion.genexpansion elaboration +expansion.level elaboration +qap.hypophora topic-comment +bg-compare background +bg-general background +bg-goal background +cause-result cause +elab-addition elaboration +elab-aspect elaboration +elab-definition elaboration +elab-enumember elaboration +elab-example elaboration +elab-process_step elaboration +exp-evidence explanation +exp-reason explanation +findings cause +acknowledgement attribution +alternation condition +clarification_question topic-comment +comment evaluation +continuation joint +correction contrast +explanation* explanation +flashback explanation +frame explanation +goal enablement +narration elaboration +parallel joint +q_elab elaboration +question_answer_pair topic-comment +temploc temporal diff --git a/mappings/substitutions.txt b/mappings/substitutions.txt new file mode 100644 index 0000000000000000000000000000000000000000..4fbb7f93846b3599ade1a59263d318e325522763 --- /dev/null +++ b/mappings/substitutions.txt @@ -0,0 +1,751 @@ +ORIGINAL-LABEL CORPUS SUBSTITUTION +disjunction deu.rst.pcc expansion.disjunction +evidence deu.rst.pcc explanation-evidence +list deu.rst.pcc joint-list +restatement deu.rst.pcc expansion.restatement +sequence deu.rst.pcc joint-sequence +condition deu.rst.pcc conditional +contrast deu.rst.pcc unless +conjunction deu.rst.pcc expansion.conjunction +background deu.rst.pcc bg-general +cause deu.rst.pcc causation +ATTRIBUTION eng.dep.covdtb attribution +BACKGROUND eng.dep.covdtb background +CAUSE-RESULT eng.dep.covdtb cause-effect +COMPARISON eng.dep.covdtb comparison +CONDITION eng.dep.covdtb condition +ELABORATION eng.dep.covdtb elaboration +ENABLEMENT eng.dep.covdtb enablement +FINDINGS eng.dep.covdtb result +JOINT eng.dep.covdtb joint +MANNER-MEANS eng.dep.covdtb manner-means +TEMPORAL eng.dep.covdtb temporal +TEXTUAL-ORGANIZATION eng.dep.covdtb organization +TEMPORAL eng.dep.covdtb temporal.synchrony +ELABORATION eng.dep.covdtb context +JOINT eng.dep.covdtb expansion.conjunction +CAUSE-RESULT eng.dep.covdtb causation +ELABORATION eng.dep.covdtb elab-aspect +MANNER-MEANS eng.dep.covdtb mode +ENABLEMENT eng.dep.covdtb contingency.purpose +JOINT eng.dep.covdtb joint-disjunction +CAUSE-RESULT eng.dep.covdtb contingency.cause +CONDITION eng.dep.covdtb conditional +TEXTUAL-ORGANIZATION eng.dep.covdtb textual-organization +COMPARISON eng.dep.covdtb unless +COMPARISON eng.dep.covdtb comparison.contrast +CAUSE-RESULT eng.dep.covdtb causal +FINDINGS eng.dep.covdtb evaluation +ELABORATION eng.dep.covdtb explanation +ELABORATION eng.dep.covdtb expansion.level +ELABORATION eng.dep.covdtb expansion.instantiation +ELABORATION eng.dep.covdtb elab-addition +COMPARISON eng.dep.covdtb otherwise +COMPARISON eng.dep.covdtb comparison.concession +TEMPORAL eng.dep.covdtb temporal.asynchronous +ELABORATION eng.dep.covdtb restatement +BACKGROUND eng.dep.covdtb bg-general +MANNER-MEANS eng.dep.covdtb expansion.manner +CONDITION eng.dep.covdtb contingency.condition +ENABLEMENT eng.dep.covdtb purpose +bg-general eng.dep.scidtb background +exp-evidence eng.dep.scidtb explanation-evidence +temporal eng.dep.scidtb context +joint eng.dep.scidtb expansion.conjunction +elab-addition eng.dep.scidtb contingency +cause eng.dep.scidtb causation +manner-means eng.dep.scidtb mode +cause eng.dep.scidtb contingency.cause +condition eng.dep.scidtb conditional +contrast eng.dep.scidtb unless +contrast eng.dep.scidtb comparison.contrast +elab-addition eng.dep.scidtb elaboration +elab-addition eng.dep.scidtb explanation +joint eng.dep.scidtb expansion.level +contrast eng.dep.scidtb otherwise +temporal eng.dep.scidtb temporal.asynchronous +temporal eng.dep.scidtb restatement +enablement eng.dep.scidtb purpose +Comparison.Concession eng.pdtb.pdtb comparison.concession +Comparison.Concession+SpeechAct eng.pdtb.pdtb comparison.concession+speechact +Comparison.Contrast eng.pdtb.pdtb comparison.contrast +Comparison.Similarity eng.pdtb.pdtb comparison.similarity +Contingency.Cause eng.pdtb.pdtb contingency.cause +Contingency.Cause+Belief eng.pdtb.pdtb contingency.cause+belief +Contingency.Cause+SpeechAct eng.pdtb.pdtb contingency.cause+speechact +Contingency.Condition eng.pdtb.pdtb contingency.condition +Contingency.Condition+SpeechAct eng.pdtb.pdtb contingency.condition+speechact +Contingency.Negative-cause eng.pdtb.pdtb contingency.negative-cause +Contingency.Negative-condition eng.pdtb.pdtb contingency.negative-condition +Contingency.Purpose eng.pdtb.pdtb contingency.purpose +Expansion.Conjunction eng.pdtb.pdtb expansion.conjunction +Expansion.Disjunction eng.pdtb.pdtb expansion.disjunction +Expansion.Equivalence eng.pdtb.pdtb expansion.equivalence +Expansion.Exception eng.pdtb.pdtb expansion.exception +Expansion.Instantiation eng.pdtb.pdtb expansion.instantiation +Expansion.Level-of-detail eng.pdtb.pdtb expansion.level-of-detail +Expansion.Manner eng.pdtb.pdtb expansion.manner +Expansion.Substitution eng.pdtb.pdtb expansion.substitution +Hypophora eng.pdtb.pdtb hypophora +Temporal.Asynchronous eng.pdtb.pdtb temporal.asynchronous +Temporal.Synchronous eng.pdtb.pdtb temporal.synchronous +Acknowledgement eng.pdtb.pdtb acknowledgement +Temporal.Synchronous eng.pdtb.pdtb temporal.synchrony +Hypophora eng.pdtb.pdtb qap.hypophora +Temporal.Synchronous eng.pdtb.pdtb context +Contingency.Condition eng.pdtb.pdtb contingency +Contingency.Purpose eng.pdtb.pdtb qap +Contingency.Negative-condition eng.pdtb.pdtb mode +Contingency.Condition eng.pdtb.pdtb enablement +expansion.disjunction eng.pdtb.pdtb joint-disjunction +Contingency.Condition eng.pdtb.pdtb conditional +Hypophora eng.pdtb.pdtb q_elab +Expansion.Conjunction eng.pdtb.pdtb joint +Expansion.Conjunction eng.pdtb.pdtb elaboration +Contingency.Cause eng.pdtb.pdtb causal +Contingency.Condition eng.pdtb.pdtb comparison +Contingency.Cause eng.pdtb.pdtb explanation +expansion.level-of-detail eng.pdtb.pdtb expansion.level +adversative eng.pdtb.pdtb otherwise +Expansion.Conjunction eng.pdtb.pdtb continuation +Contingency.Purpose eng.pdtb.pdtb purpose +Comparison.Concession eng.pdtb.tedm comparison.concession +Comparison.Contrast eng.pdtb.tedm comparison.contrast +Comparison.Similarity eng.pdtb.tedm comparison.similarity +Contingency.Cause eng.pdtb.tedm contingency.cause +Contingency.Cause+Belief eng.pdtb.tedm contingency.cause+belief +Contingency.Cause+SpeechAct eng.pdtb.tedm contingency.cause+speechact +Contingency.Condition eng.pdtb.tedm contingency.condition +Contingency.Purpose eng.pdtb.tedm contingency.purpose +Expansion.Conjunction eng.pdtb.tedm expansion.conjunction +Expansion.Disjunction eng.pdtb.tedm expansion.disjunction +Expansion.Equivalence eng.pdtb.tedm expansion.equivalence +Expansion.Instantiation eng.pdtb.tedm expansion.instantiation +Expansion.Level-of-detail eng.pdtb.tedm expansion.level-of-detail +Expansion.Manner eng.pdtb.tedm expansion.manner +Expansion.Substitution eng.pdtb.tedm expansion.substitution +Hypophora eng.pdtb.tedm hypophora +Temporal.Asynchronous eng.pdtb.tedm temporal.asynchronous +Temporal.Synchronous eng.pdtb.tedm temporal.synchronous +Temporal.Synchronous eng.pdtb.tedm temporal.synchrony +Temporal.Synchronous eng.pdtb.tedm context +Contingency.Condition eng.pdtb.tedm contingency +Expansion.Manner eng.pdtb.tedm mode +Expansion.Manner eng.pdtb.tedm enablement +Expansion.Disjunction eng.pdtb.tedm joint-disjunction +Contingency.Condition eng.pdtb.tedm conditional +Expansion.Conjunction eng.pdtb.tedm joint +Comparison.Contrast eng.pdtb.tedm unless +Contingency.Cause eng.pdtb.tedm textual-organization +Expansion.Level-of-detail eng.pdtb.tedm elaboration +Contingency.Cause eng.pdtb.tedm causal +Contingency.Cause eng.pdtb.tedm explanation +Expansion.Level-of-detail eng.pdtb.tedm expansion.level +Expansion.Manner eng.pdtb.tedm manner-means +Comparison.Contrast eng.pdtb.tedm otherwise +Contingency.Purpose eng.pdtb.tedm purpose +restatement eng.rst.gum expansion.restatement +elaboration eng.rst.gum acknowledgement +context eng.rst.gum temporal.synchrony +topic eng.rst.gum qap.hypophora +joint eng.rst.gum expansion.conjunction +causal eng.rst.gum causation +topic eng.rst.gum qap +purpose eng.rst.gum enablement +explanation eng.rst.gum expansion.substitution +purpose eng.rst.gum contingency.purpose +causal eng.rst.gum contingency.cause +contingency eng.rst.gum conditional +elaboration eng.rst.gum q_elab +organization eng.rst.gum textual-organization +adversative eng.rst.gum unless +adversative eng.rst.gum comparison.contrast +topic eng.rst.gum clarification_question +adversative eng.rst.gum comparison +elaboration eng.rst.gum expansion.level +mode eng.rst.gum manner-means +context eng.rst.gum expansion.instantiation +elaboration eng.rst.gum elab-addition +adversative eng.rst.gum otherwise +adversative eng.rst.gum comparison.concession +joint eng.rst.gum comparison.similarity +context eng.rst.gum temporal.asynchronous +context eng.rst.gum temporal +explanation eng.rst.gum continuation +explanation eng.rst.gum bg-general +mode eng.rst.gum expansion.manner +contingency eng.rst.gum contingency.condition +textual-organization eng.rst.rstdt organization +temporal.synchronous eng.rst.rstdt temporal.synchrony +hypophora eng.rst.rstdt qap.hypophora +circumstance eng.rst.rstdt context +joint eng.rst.rstdt expansion.conjunction +condition eng.rst.rstdt contingency +contrast eng.rst.rstdt expansion.substitution +enablement eng.rst.rstdt contingency.purpose +joint eng.rst.rstdt joint-disjunction +cause eng.rst.rstdt contingency.cause +condition eng.rst.rstdt conditional +condition eng.rst.rstdt contingency.negative-condition +contrast eng.rst.rstdt unless +comparison eng.rst.rstdt comparison.contrast +cause eng.rst.rstdt causal +topic-comment eng.rst.rstdt solutionhood +elaboration eng.rst.rstdt expansion.level +summary eng.rst.rstdt parenthetical +elaboration eng.rst.rstdt expansion.instantiation +elaboration eng.rst.rstdt elab-addition +adversative eng.rst.rstdt otherwise +comparison eng.rst.rstdt comparison.concession +comparison eng.rst.rstdt comparison.similarity +temporal eng.rst.rstdt temporal.asynchronous +expansion.restatement eng.rst.rstdt restatement +background eng.rst.rstdt bg-general +manner-means eng.rst.rstdt expansion.manner +condition eng.rst.rstdt contingency.condition +enablement eng.rst.rstdt purpose +Acknowledgement eng.sdrt.stac acknowledgement +Alternation eng.sdrt.stac expansion.alternative +Background eng.sdrt.stac background +Clarification_question eng.sdrt.stac clarification_question +Comment eng.sdrt.stac comment +Conditional eng.sdrt.stac condition +Continuation eng.sdrt.stac continuation +Contrast eng.sdrt.stac contrast +Correction eng.sdrt.stac expansion.correction +Elaboration eng.sdrt.stac elaboration +Explanation eng.sdrt.stac explanation +Narration eng.sdrt.stac narration +Parallel eng.sdrt.stac parallel +Q_Elab eng.sdrt.stac q_elab +Question_answer_pair eng.sdrt.stac question_answer_pair +Result eng.sdrt.stac result +Acknowledgement eng.sdrt.stac attribution +Elaboration eng.sdrt.stac context +Continuation eng.sdrt.stac contingency +Question_answer_pair eng.sdrt.stac qap +Elaboration eng.sdrt.stac contingency.cause +Conditional eng.sdrt.stac conditional +Continuation eng.sdrt.stac joint +Contrast eng.sdrt.stac unless +Continuation eng.sdrt.stac textual-organization +Result eng.sdrt.stac causal +Elaboration eng.sdrt.stac evaluation +Continuation eng.sdrt.stac expansion.level +Contrast eng.sdrt.stac otherwise +Elaboration eng.sdrt.stac temporal.asynchronous +Correction eng.sdrt.stac restatement +Conditional eng.sdrt.stac contingency.condition +disjunction eus.rst.ert expansion.disjunction +evidence eus.rst.ert explanation-evidence +justify eus.rst.ert explanation-justify +list eus.rst.ert joint-list +motivation eus.rst.ert explanation-motivation +otherwise eus.rst.ert adversative +restatement eus.rst.ert expansion.restatement +sequence eus.rst.ert joint-sequence +unconditional eus.rst.ert expansion.disjunction +unless eus.rst.ert contrast +condition eus.rst.ert conditional +preparation eus.rst.ert attribution +result eus.rst.ert findings +background eus.rst.ert bg-general +expansion.disjunction eus.rst.ert joint-disjunction +conjunction eus.rst.ert expansion.conjunction +cause eus.rst.ert causation +background eus.rst.ert bg-general +condition fas.rst.prstc conditional +elaboration fas.rst.prstc preparation +contrast fas.rst.prstc unless +joint fas.rst.prstc expansion.conjunction +background fas.rst.prstc bg-general +cause fas.rst.prstc causation +alternation fra.sdrt.annodis expansion.alternative +conditional fra.sdrt.annodis condition +goal fra.sdrt.annodis purpose-goal +elaboration fra.sdrt.annodis joint +contrast fra.sdrt.annodis unless +e-elaboration fra.sdrt.annodis purpose +Comparison ita.pdtb.luna comparison +Comparison.Concession ita.pdtb.luna comparison.concession +Comparison.Contrast ita.pdtb.luna comparison.contrast +Contingency.Cause ita.pdtb.luna contingency.cause +Contingency.Condition ita.pdtb.luna contingency.condition +Contingency.Goal ita.pdtb.luna contingency.goal +Expansion.Alternative ita.pdtb.luna expansion.alternative +Expansion.Conjunction ita.pdtb.luna expansion.conjunction +Expansion.Instantiation ita.pdtb.luna expansion.instantiation +Expansion.Restatement ita.pdtb.luna expansion.restatement +Interrupted ita.pdtb.luna interrupted +Repetition ita.pdtb.luna restatement-repetition +Temporal.Asynchronous ita.pdtb.luna temporal.asynchronous +Temporal.Synchrony ita.pdtb.luna temporal.synchronous +Temporal.Synchrony ita.pdtb.luna temporal.synchrony +Expansion.Conjunction ita.pdtb.luna joint +Contingency.Cause ita.pdtb.luna contingency.purpose +Expansion.Restatement ita.pdtb.luna restatement +Expansion.Disjunction ita.pdtb.luna joint-disjunction +disjunction nld.rst.nldt expansion.disjunction +evidence nld.rst.nldt explanation-evidence +justify nld.rst.nldt explanation-justify +list nld.rst.nldt joint-list +motivation nld.rst.nldt explanation-motivation +otherwise nld.rst.nldt adversative +restatement nld.rst.nldt expansion.restatement +sequence nld.rst.nldt joint-sequence +unconditional nld.rst.nldt expansion.disjunction +unless nld.rst.nldt contrast +condition nld.rst.nldt conditional +background nld.rst.nldt bg-general +conjunction nld.rst.nldt expansion.conjunction +disjunction nld.rst.nldt joint-disjunction +Comparison por.pdtb.crpc comparison +Comparison.Concession por.pdtb.crpc comparison.concession +Comparison.Contrast por.pdtb.crpc comparison.contrast +Comparison.Similarity por.pdtb.crpc comparison.similarity +Contingency.Cause por.pdtb.crpc contingency.cause +Contingency.Condition por.pdtb.crpc contingency.condition +Contingency.Negative por.pdtb.crpc contingency.negative +Contingency.Purpose por.pdtb.crpc contingency.purpose +Expansion.Conjunction por.pdtb.crpc expansion.conjunction +Expansion.Disjunction por.pdtb.crpc expansion.disjunction +Expansion.Equivalence por.pdtb.crpc expansion.equivalence +Expansion.Exception por.pdtb.crpc expansion.exception +Expansion.Instantiation por.pdtb.crpc expansion.instantiation +Expansion.Level por.pdtb.crpc expansion.level-of-detail +Expansion.Manner por.pdtb.crpc expansion.manner +Expansion.Substitution por.pdtb.crpc expansion.substitution +Hypophora por.pdtb.crpc hypophora +QAP por.pdtb.crpc question_answer_pair +QAP.Hypophora por.pdtb.crpc hypophora +Temporal por.pdtb.crpc temporal +Temporal.Asynchronous por.pdtb.crpc temporal.asynchronous +Temporal.Synchronous por.pdtb.crpc temporal.synchronous +Acknowledgement por.pdtb.crpc acknowledgement +Temporal.Synchronous por.pdtb.crpc temporal.synchrony +Expansion.Conjunction por.pdtb.crpc preparation +Expansion.Conjunction por.pdtb.crpc list +Contingency.Condition por.pdtb.crpc circumstance +Expansion.Disjunction por.pdtb.crpc joint-disjunction +Expansion.Conjunction por.pdtb.crpc joint +Expansion.Level por.pdtb.crpc textual-organization +Expansion.Conjunction por.pdtb.crpc elaboration +Expansion.Level por.pdtb.crpc expansion.level +Expansion.Level por.pdtb.crpc parenthetical +Contingency.Purpose por.pdtb.crpc purpose +Comparison.Concession por.pdtb.tedm comparison.concession +Comparison.Contrast por.pdtb.tedm comparison.contrast +Comparison.Similarity por.pdtb.tedm comparison.similarity +Contingency.Cause por.pdtb.tedm contingency.cause +Contingency.Cause+Belief por.pdtb.tedm contingency.cause+belief +Contingency.Condition por.pdtb.tedm contingency.condition +Contingency.Condition+SpeechAct por.pdtb.tedm contingency.condition+speechact +Contingency.Purpose por.pdtb.tedm contingency.purpose +Expansion.Conjunction por.pdtb.tedm expansion.conjunction +Expansion.Disjunction por.pdtb.tedm expansion.disjunction +Expansion.Equivalence por.pdtb.tedm expansion.equivalence +Expansion.Instantiation por.pdtb.tedm expansion.instantiation +Expansion.Level-of-detail por.pdtb.tedm expansion.level-of-detail +Expansion.Manner por.pdtb.tedm expansion.manner +Expansion.Substitution por.pdtb.tedm expansion.substitution +Hypophora por.pdtb.tedm hypophora +Temporal.Asynchronous por.pdtb.tedm temporal.asynchronous +Temporal.Synchronous por.pdtb.tedm temporal.synchronous +Contingency.Cause por.pdtb.tedm explanation +Expansion.Level-of-detail por.pdtb.tedm expansion.level +Temporal.Synchronous por.pdtb.tedm temporal.synchrony +Expansion.Level-of-detail por.pdtb.tedm elaboration +Expansion.Disjunction por.pdtb.tedm joint-disjunction +Expansion.Conjunction por.pdtb.tedm list +Contingency.Purpose por.pdtb.tedm purpose +evidence por.rst.cstn explanation-evidence +justify por.rst.cstn explanation-justify +list por.rst.cstn joint-list +motivation por.rst.cstn explanation-motivation +otherwise por.rst.cstn adversative +restatement por.rst.cstn expansion.restatement +sequence por.rst.cstn joint-sequence +elaboration por.rst.cstn expansion.level +condition por.rst.cstn conditional +circumstance por.rst.cstn temporal.asynchronous +list por.rst.cstn expansion.conjunction +elaboration por.rst.cstn contingency.cause +evidence rus.rst.rrt explanation-evidence +motivation rus.rst.rrt explanation-motivation +restatement rus.rst.rrt expansion.restatement +sequence rus.rst.rrt joint-sequence +condition rus.rst.rrt conditional +contrast rus.rst.rrt unless +joint rus.rst.rrt expansion.conjunction +background rus.rst.rrt bg-general +cause rus.rst.rrt causation +alternative spa.rst.rststb expansion.alternative +disjunction spa.rst.rststb expansion.disjunction +evidence spa.rst.rststb explanation-evidence +justify spa.rst.rststb explanation-justify +list spa.rst.rststb joint-list +motivation spa.rst.rststb explanation-motivation +restatement spa.rst.rststb expansion.restatement +sequence spa.rst.rststb joint-sequence +unless spa.rst.rststb contrast +condition spa.rst.rststb conditional +means spa.rst.rststb manner-means +result spa.rst.rststb findings +background spa.rst.rststb bg-general +cause spa.rst.rststb causation +disjunction spa.rst.sctb expansion.disjunction +evidence spa.rst.sctb explanation-evidence +justify spa.rst.sctb explanation-justify +list spa.rst.sctb joint-list +motivation spa.rst.sctb explanation-motivation +restatement spa.rst.sctb expansion.restatement +sequence spa.rst.sctb joint-sequence +condition spa.rst.sctb conditional +result spa.rst.sctb findings +list spa.rst.sctb joint +contrast spa.rst.sctb unless +background spa.rst.sctb bg-general +Comparison.Concession tha.pdtb.tdtb comparison.concession +Comparison.Contrast tha.pdtb.tdtb comparison.contrast +Comparison.Similarity tha.pdtb.tdtb comparison.similarity +Contingency.Cause tha.pdtb.tdtb contingency.cause +Contingency.Cause+Belief tha.pdtb.tdtb contingency.cause+belief +Contingency.Cause+SpeechAct tha.pdtb.tdtb contingency.cause+speechact +Contingency.Condition tha.pdtb.tdtb contingency.condition +Contingency.Negative-Condition tha.pdtb.tdtb contingency.negative-condition +Contingency.Negative-Condition+SpeechAct tha.pdtb.tdtb contingency.negative-condition+speechact +Contingency.Purpose tha.pdtb.tdtb contingency.purpose +Expansion.Conjunction tha.pdtb.tdtb expansion.conjunction +Expansion.Disjunction tha.pdtb.tdtb expansion.disjunction +Expansion.Equivalence tha.pdtb.tdtb expansion.equivalence +Expansion.Exception tha.pdtb.tdtb expansion.exception +Expansion.GenExpansion tha.pdtb.tdtb expansion +Expansion.Instantiation tha.pdtb.tdtb expansion.instantiation +Expansion.Level-of-detail tha.pdtb.tdtb expansion.level-of-detail +Expansion.Substitution tha.pdtb.tdtb expansion.substitution +Temporal.Asynchronous tha.pdtb.tdtb temporal.asynchronous +Temporal.Synchronous tha.pdtb.tdtb temporal.synchronous +Temporal.Synchronous tha.pdtb.tdtb temporal.synchrony +Expansion.GenExpansion tha.pdtb.tdtb expansion.genexpansion +Expansion.Disjunction tha.pdtb.tdtb joint-disjunction +Comparison.Concession tur.pdtb.tdb comparison.concession +Comparison.Concession+SpeechAct tur.pdtb.tdb comparison.concession+speechact +Comparison.Contrast tur.pdtb.tdb comparison.contrast +Comparison.Degree tur.pdtb.tdb comparison.degree +Comparison.Similarity tur.pdtb.tdb comparison.similarity +Contingency.Cause tur.pdtb.tdb contingency.cause +Contingency.Cause+Belief tur.pdtb.tdb contingency.cause+belief +Contingency.Cause+SpeechAct tur.pdtb.tdb contingency.cause+speechact +Contingency.Condition tur.pdtb.tdb contingency.condition +Contingency.Negative-condition tur.pdtb.tdb contingency.negative-condition +Contingency.Purpose tur.pdtb.tdb contingency.purpose +Expansion.Conjunction tur.pdtb.tdb expansion.conjunction +Expansion.Correction tur.pdtb.tdb expansion.correction +Expansion.Disjunction tur.pdtb.tdb expansion.disjunction +Expansion.Equivalence tur.pdtb.tdb expansion.equivalence +Expansion.Exception tur.pdtb.tdb expansion.exception +Expansion.Instantiation tur.pdtb.tdb expansion.instantiation +Expansion.Level-of-detail tur.pdtb.tdb expansion.level-of-detail +Expansion.Manner tur.pdtb.tdb expansion.manner +Expansion.Substitution tur.pdtb.tdb expansion.substitution +Hypophora tur.pdtb.tdb hypophora +Temporal.Asynchronous tur.pdtb.tdb temporal.asynchronous +Temporal.Synchronous tur.pdtb.tdb temporal.synchronous +Expansion.Level-of-detail tur.pdtb.tdb expansion.level +Temporal.Synchronous tur.pdtb.tdb temporal.synchrony +Hypophora tur.pdtb.tdb qap.hypophora +Expansion.Disjunction tur.pdtb.tdb joint-disjunction +Comparison.Concession tur.pdtb.tedm comparison.concession +Comparison.Concession+SpeechAct tur.pdtb.tedm comparison.concession+speechact +Comparison.Contrast tur.pdtb.tedm comparison.contrast +Comparison.Similarity tur.pdtb.tedm comparison.similarity +Contingency.Cause tur.pdtb.tedm contingency.cause +Contingency.Cause+Belief tur.pdtb.tedm contingency.cause+belief +Contingency.Cause+SpeechAct tur.pdtb.tedm contingency.cause+speechact +Contingency.Condition tur.pdtb.tedm contingency.condition +Contingency.Negative-condition tur.pdtb.tedm contingency.negative-condition +Contingency.Purpose tur.pdtb.tedm contingency.purpose +Expansion tur.pdtb.tedm expansion +Expansion.Conjunction tur.pdtb.tedm expansion.conjunction +Expansion.Disjunction tur.pdtb.tedm expansion.disjunction +Expansion.Equivalence tur.pdtb.tedm expansion.equivalence +Expansion.Exception tur.pdtb.tedm expansion.exception +Expansion.Instantiation tur.pdtb.tedm expansion.instantiation +Expansion.Level-of-detail tur.pdtb.tedm expansion.level-of-detail +Expansion.Manner tur.pdtb.tedm expansion.manner +Expansion.Substitution tur.pdtb.tedm expansion.substitution +Hypophora tur.pdtb.tedm hypophora +Temporal.Asynchronous tur.pdtb.tedm temporal.asynchronous +Temporal.Synchronous tur.pdtb.tedm temporal.synchronous +Expansion.Level-of-detail tur.pdtb.tedm expansion.level +Temporal.Synchronous tur.pdtb.tedm temporal.synchrony +Expansion.Disjunction tur.pdtb.tedm joint-disjunction +bg-general zho.dep.scidtb background +exp-evidence zho.dep.scidtb explanation-evidence +ROOT zho.dep.scidtb root +condition zho.dep.scidtb conditional +temporal zho.dep.scidtb context-circumstance +contrast zho.dep.scidtb unless +expansion zho.dep.scidtb expansion.genexpansion +elab-addition zho.dep.scidtb elaboration +Alternative zho.pdtb.cdtb expansion.alternative +Causation zho.pdtb.cdtb cause +Conditional zho.pdtb.cdtb condition +Conjunction zho.pdtb.cdtb conjunction +Contrast zho.pdtb.cdtb contrast +Expansion zho.pdtb.cdtb expansion +Progression zho.pdtb.cdtb progression +Purpose zho.pdtb.cdtb purpose +Temporal zho.pdtb.cdtb temporal +Progression zho.pdtb.cdtb topic-question +Conjunction zho.pdtb.cdtb list +Conditional zho.pdtb.cdtb contingency-condition +Conjunction zho.pdtb.cdtb expansion.conjunction +Causation zho.pdtb.cdtb causation +Expansion zho.pdtb.cdtb organization-heading +Expansion zho.pdtb.cdtb elaboration-additional +Purpose zho.pdtb.cdtb attribution-positive +Contrast zho.pdtb.cdtb joint-disjunction +Conditional zho.pdtb.cdtb conditional +Conditional zho.pdtb.cdtb context-circumstance +Conditional zho.pdtb.cdtb causal-cause +Contrast zho.pdtb.cdtb unless +Conjunction zho.pdtb.cdtb expansion.genexpansion +Contrast zho.pdtb.cdtb comparison.contrast +Conjunction zho.pdtb.cdtb elaboration +Conjunction zho.pdtb.cdtb sequence +Expansion zho.pdtb.cdtb exp-evidence +Contrast zho.pdtb.cdtb adversative-contrast +joint-disjunction zho.rst.gcdt expansion.disjunction +attribution-positive zho.rst.gcdt attribution +joint-list zho.rst.gcdt list +joint-list zho.rst.gcdt expansion.conjunction +cause zho.rst.gcdt causation +causal-result zho.rst.gcdt enablement +purpose-goal zho.rst.gcdt goal +condition zho.rst.gcdt conditional +adversative-concession zho.rst.gcdt concession +contrast zho.rst.gcdt unless +elaboration-additional zho.rst.gcdt expansion.genexpansion +elaboration-additional zho.rst.gcdt elaboration +adversative-antithesis zho.rst.gcdt comparison +elaboration-additional zho.rst.gcdt elab-addition +joint-sequence zho.rst.gcdt sequence +explanation-evidence zho.rst.gcdt exp-evidence +joint-other zho.rst.gcdt temporal +purpose-goal zho.rst.gcdt purpose +disjunction zho.rst.sctb expansion.disjunction +evidence zho.rst.sctb explanation-evidence +justify zho.rst.sctb explanation-justify +list zho.rst.sctb joint-list +motivation zho.rst.sctb explanation-motivation +restatement zho.rst.sctb expansion.restatement +sequence zho.rst.sctb joint-sequence +condition zho.rst.sctb conditional +circumstance zho.rst.sctb context-circumstance +means zho.rst.sctb manner-means +result zho.rst.sctb findings +elaboration zho.rst.sctb elaboration-attribute +list zho.rst.sctb joint +contrast zho.rst.sctb unless +evidence zho.rst.sctb exp-evidence +condition zho.rst.sctb contingency-condition +preparation zho.rst.sctb organization-heading +elaboration zho.rst.sctb expansion.genexpansion +elaboration zho.rst.sctb joint-disjunction +elaboration zho.rst.sctb expansion.conjunction +cause zho.rst.sctb causation +elaboration zho.rst.sctb elaboration-additional +evidence deu.rst.pcc exp-evidence +BACKGROUND eng.dep.covdtb bg-goal +COMPARISON eng.dep.covdtb comparison.similarity +CONDITION eng.dep.covdtb contingency +ELABORATION eng.dep.covdtb elab-enumember +ELABORATION eng.dep.covdtb elab-process_step +CAUSE-RESULT eng.dep.covdtb exp-reason +CAUSE-RESULT eng.dep.covdtb findings +TEXTUAL-ORGANIZATION eng.dep.covdtb preparation +ELABORATION eng.dep.covdtb summary +Comparison.Concession eng.pdtb.pdtb adversative +Temporal.Synchronous eng.pdtb.pdtb bg-general +Hypophora eng.pdtb.pdtb clarification_question +Temporal.Asynchronous eng.pdtb.pdtb temporal +Expansion.Conjunction eng.pdtb.tedm attribution +Temporal.Asynchronous eng.pdtb.tedm continuation +Expansion.Conjunction eng.pdtb.tedm evaluation +evaluation eng.rst.gum comment +elaboration-additional eng.rst.gum elab-enumember +causal-cause eng.rst.gum exp-reason +organization-preparation eng.rst.gum preparation +contrast eng.rst.rstdt adversative +cause eng.rst.rstdt causation +background eng.rst.rstdt circumstance +evaluation eng.rst.rstdt comment +summary eng.rst.rstdt expansion.restatement +topic-comment eng.rst.rstdt hypophora +means eng.rst.rstdt mode +topic-comment eng.rst.rstdt qap +temporal eng.rst.rstdt temporal.synchronous +temporal eng.rst.rstdt temporal.asynchronous +topic-comment eng.rst.rstdt topic +Correction eng.sdrt.stac expansion.conjunction +Continuation eng.sdrt.stac expansion.instantiation +Result eng.sdrt.stac findings +Question_answer_pair eng.sdrt.stac topic +background fra.sdrt.annodis bg-general +result fra.sdrt.annodis findings +Interrupted ita.pdtb.luna comment +Comparison ita.pdtb.luna comparison.similarity +Expansion.Resttatement ita.pdtb.luna Expansion.Disjunction +Expansion.Conjunction ita.pdtb.luna expansion.level +Contingency.Goal ita.pdtb.luna purpose +Repetition ita.pdtb.luna repetition +Expansion.Conjunction por.pdtb.crpc comment +Expansion.Conjunction por.pdtb.crpc sequence +Comparison.Contrast por.pdtb.tedm unless +Comparison.Contrast por.rst.cstn unless +evidence rus.rst.rrt exp-evidence +cause spa.rst.sctb causation +result zho.dep.scidtb findings +Contrast zho.pdtb.cdtb adversative-concession +Conditional zho.pdtb.cdtb context-background +Purpose zho.pdtb.cdtb goal +context-background zho.rst.gcdt bg-general +causal-cause zho.rst.gcdt cause +contingency-condition zho.rst.gcdt condition +adversative-contrast zho.rst.gcdt contrast +background zho.rst.sctb bg-general +elaboration zho.rst.sctb elab-addition +purpose zho.rst.sctb goal +means zho.rst.sctb mode-means +result deu.rst.pcc findings +BACKGROUND eng.dep.covdtb bg-compare +ELABORATION eng.dep.covdtb elab-example +CONDITION eng.dep.covdtb contingency.negative-condition +bg-goal eng.dep.scidtb contingency.purpose +cause eng.dep.scidtb causal +manner-means eng.dep.scidtb MANNER-MEANS +organization eng.dep.scidtb textual-organization +result eng.dep.scidtb findings +Expansion.Substitution eng.pdtb.pdtb restatement +Contingency.Condition eng.pdtb.pdtb Acknowledgement +Expansion.Conjunction eng.pdtb.pdtb evaluation +Contingency.Cause eng.pdtb.pdtb causation +Expansion.Conjunction eng.pdtb.pdtb textual-organization +expansion.restatement eng.pdtb.tedm restatement +Contingency.Cause eng.pdtb.tedm findings +Hypophora eng.pdtb.tedm qap.hypophora +elaboration eng.rst.gum elaboration-additional +topic eng.rst.gum topic-change +result eng.rst.gum findings +elaboration eng.rst.rstdt expansion.equivalence +textual-organization eng.rst.rstdt continuation +manner-means eng.rst.rstdt means +Alternation eng.sdrt.stac alternative +Correction eng.sdrt.stac expansion.substitution +Background eng.sdrt.stac bg-general +joint fas.rst.prstc textual-organization +frame fra.sdrt.annodis expansion.manner +organization fra.sdrt.annodis textual-organization +Expansion.Conjunction ita.pdtb.luna expansion.equivalence +Contingency.Cause ita.pdtb.luna attribution +Expansion.Restatement ita.pdtb.luna expansion.substitution +Expansion.Conjunction ita.pdtb.luna sequence +cause nld.rst.nldt causation +Contingency.Cause por.pdtb.crpc nonvolitional-cause +Temporal por.pdtb.crpc continuation +Comparison.Contrast por.pdtb.tedm comparison +Expansion.Conjunction por.pdtb.tedm sequence +parenthetical por.rst.cstn context +cause-effect rus.rst.rrt cause-result +Expansion.Conjunction spa.rst.rststb comparison +contrast spa.rst.sctb comparison +Expansion.Level-of-detail tur.pdtb.tdb restatement +manner-means zho.dep.scidtb mode-means +Conjunction zho.pdtb.cdtb joint-other +Conjunction zho.pdtb.cdtb evaluation +Conjunction zho.pdtb.cdtb evaluation-comment +Causation zho.pdtb.cdtb mode-means +Causation zho.pdtb.cdtb mode-manner +Expansion zho.pdtb.cdtb organization-preparation +result zho.rst.gcdt findings +cause zho.rst.sctb causal-result +preparation zho.rst.sctb organization-preparation +TEMPORAL eng.dep.covdtb progression +TEMPORAL eng.dep.covdtb continuation +TEXTUAL-ORGANIZATION eng.dep.scidtb organization +Expansion.Level-of-detail eng.pdtb.pdtb expansion.level-of-detail +Expansion.Disjunction eng.pdtb.pdtb expansion.disjunction +Expansion.Manner eng.pdtb.pdtb manner-means +Comparison.Contrast eng.pdtb.pdtb adversative +organization eng.rst.gum summary +restatement eng.rst.gum alternative +elaboration eng.rst.gum elab-process_step +elaboration eng.rst.gum elaboration-additional +causal eng.rst.gum result +restatement eng.rst.gum parallel +comparison eng.rst.rstdt adversative +temporal eng.rst.rstdt circumstance +textual-organization eng.rst.rstdt expansion.restatement +temporal eng.rst.rstdt temporal.synchronous +elaboration eng.rst.rstdt hypophora +manner-means eng.rst.rstdt means +Correction eng.sdrt.stac correction +disjunction eus.rst.ert expansion.disjunction +evidence eus.rst.ert exp-evidence +Expansion.Conjunction ita.pdtb.luna Expansion.Disjunction +explanation-evidence nld.rst.nldt exp-evidence +Expansion.Level por.pdtb.crpc interpretation +Hypophora por.pdtb.tedm qap.hypophora +Expansion.Conjunction por.pdtb.tedm joint +evidence por.rst.cstn exp-evidence +elaboration rus.rst.rrt elab-addition +evidence spa.rst.rststb exp-evidence +conjunction spa.rst.rststb Expansion.Conjunction +list spa.rst.sctb joint-disjunction +evidence spa.rst.sctb exp-evidence +attribution zho.dep.scidtb attribution-positive +joint zho.dep.scidtb expansion.conjunction +joint zho.dep.scidtb list +bg-general zho.dep.scidtb context-background +explanation-justify zho.pdtb.cdtb justify +contingency-condition zho.rst.gcdt condition +explanation-justify zho.rst.gcdt justify +causal-cause zho.rst.gcdt cause +adversative-contrast zho.rst.gcdt contrast +concession zho.rst.sctb adversative-concession +contrast zho.rst.sctb adversative-contrast +Expansion.Level-of-detail tha.pdtb.tdtb expansion.level +volitional-cause por.rst.cstn causation +comparison por.rst.cstn Comparison.Contrast +Elaboration eng.sdrt.stac mode +Elaboration eng.sdrt.stac expansion.manner +list zho.rst.sctb joint-other +evidence nld.rst.nldt explanation-evidence +Comparison.Concession por.pdtb.tedm concession +Expansion.Alternative ita.pdtb.luna alternative +QAP por.pdtb.crpc qap +QAP.Hypophora por.pdtb.crpc qap.hypophora +manner-means eng.dep.scidtb expansion.manner +joint-list zho.rst.gcdt joint +continuation fra.sdrt.annodis expansion.conjunction +alternation fra.sdrt.annodis alternative +purpose por.rst.cstn contingency.purpose +conjuntion zho.rst.sctb temporal +contrast nld.rst.nldt comparison +Hypophora por.pdtb.tedm qap +Expansion zho.pdtb.cdtb explanation-justify +Contrast zho.pdtb.cdtb adversative-antithesis +Causation zho.pdtb.cdtb contingency.cause +Expansion zho.pdtb.cdtb bg-general +Temporal zho.pdtb.cdtb temporal.synchrony +Expansion.Level-of-detail eng.pdtb.tedm expansion.restatement +Hypophora eng.pdtb.tedm qap +causal eng.rst.gum causal-cause +elaboration eng.rst.gum elab-example +joint eng.rst.gum joint-disjunction +organization eng.rst.gum organization-preparation +FINDINGS eng.dep.covdtb exp-evidence +joint eng.dep.scidtb joint-disjunction +condition eng.dep.scidtb contingency.condition +attribution eng.rst.gum attribution-positive \ No newline at end of file diff --git a/pytorch_classifier.py b/pytorch_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..33cfa6170ba1e7841d3d93203056f88af13e2148 --- /dev/null +++ b/pytorch_classifier.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python +# coding: utf-8 + +import torch +import numpy as np +from transformers import AutoModel, AutoTokenizer, get_linear_schedule_with_warmup, set_seed +from torch import nn +from torch.optim import AdamW +from torch.utils.data import DataLoader +import torch.nn.functional as F +from torch.autograd import Variable +from tqdm import tqdm +import os +from time import sleep +from datetime import datetime +import sys +from sklearn.metrics import classification_report, accuracy_score +from configure import parse_args +from utils import * + +args = parse_args() +now = datetime.now() +dt_string = now.strftime("%d.%m.%y-%H:%M:%S") +layers_to_freeze = args.freeze_layers.split(";") +substitutions_file = 'mappings/substitutions.txt' +mapping_classes = args.mappings_file[:-4].split('-')[-1] +# specific_results = open_specific_results('mappings/specific_results.txt')['B'] +set_seed(42) + +# =============== +# Dataset class +# =============== + +class Dataset(torch.utils.data.Dataset): + + def __init__(self, sentences): + + self.labels = [sent[-1] for sent in sentences] + self.texts = [tokenizer(sent[-2], + is_split_into_words=True, + padding='max_length', + max_length = 512, + truncation=True, + return_tensors="pt") + for sent in sentences] + + def classes(self): + return self.labels + + def __len__(self): + return len(self.labels) + + def get_batch_labels(self, idx): + # Fetch a batch of labels + return np.array(self.labels[idx]) + + def get_batch_texts(self, idx): + # Fetch a batch of inputs + return self.texts[idx] + + def __getitem__(self, idx): + + batch_texts = self.get_batch_texts(idx) + batch_y = self.get_batch_labels(idx) + + return batch_texts, batch_y + +# =============== +# Load datasets +# =============== + +# Open mappings +mappings, inv_mappings = open_mappings(args.mappings_file) +batch_size = args.batch_size +tokenizer = AutoTokenizer.from_pretrained(args.transformer_model) + +train_sentences, dev_dict_sentences, test_dict_sentences = open_sentences(args.data_path, mappings) + +# Determine linear size (= number of classes in the sets + 1) +num_labels = len(set(sent[-1] for sent in train_sentences)) + 1 + +# make train/dev datasets +train_dataset = Dataset(train_sentences) +dev_dataset = {corpus: Dataset(s) for corpus, s in dev_dict_sentences.items()} +test_dataset = {corpus: Dataset(s) for corpus, s in test_dict_sentences.items()} + +# Make dasets with batches and dataloader +train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True) +dev_dict_dataloader = {corpus: DataLoader(dev_data, batch_size) + for corpus, dev_data in dev_dataset.items()} +test_dict_dataloader = {corpus: DataLoader(test_data, batch_size) + for corpus, test_data in test_dataset.items()} + + +# =============== +# Model setup +# =============== + +class TransformerClassifier(nn.Module): + + def __init__(self, dropout=args.dropout): + + super(TransformerClassifier, self).__init__() + + self.tr_model = AutoModel.from_pretrained(args.transformer_model) + self.dropout = nn.Dropout(dropout) + self.linear = nn.Linear(768, num_labels) # bert input x num of classes + self.relu = nn.ReLU() + + def forward(self, input_id, mask): + + outputs = self.tr_model(input_ids = input_id, + attention_mask = mask, + return_dict = True)['last_hidden_state'][:, 0, :] + dropout_output = self.dropout(outputs) + linear_output = self.linear(dropout_output) + final_layer = self.relu(linear_output) + + return final_layer + + +model = TransformerClassifier() + + +def train(model, + train_dataloader, + dev_dict_dataloader, + test_dict_sentences, + test_dict_dataloader, + epochs, + #specific_results + ): + + device = torch.device("cuda" if args.use_cuda else "cpu") + + criterion = nn.CrossEntropyLoss() + optimizer = AdamW(model.parameters(), #Adam + lr = 2e-5, #1e-6 + eps = 1e-8 + ) + + if args.use_cuda: + model = model.cuda() + criterion = criterion.cuda() + + gradient_accumulation_steps = args.gradient_accumulation_steps + total_steps = len(train_dataloader) * epochs + scheduler = get_linear_schedule_with_warmup(optimizer, + num_warmup_steps = 0, + num_training_steps = total_steps) + + seed_val = 42 + torch.manual_seed(seed_val) + torch.cuda.manual_seed_all(seed_val) + + # freeze layers, see argument in configure.py + if args.freeze_layers != '': + for name, param in model.named_parameters(): + if any(x in name for x in layers_to_freeze): + param.requires_grad = False + + for epoch_num in range(0, epochs): + print('\n=== Epoch {:} / {:} ==='.format(epoch_num + 1, epochs)) + + model.train() + + total_acc_train = 0 + total_loss_train = 0 + batch_counter = 0 + +# for train_input, train_label in tqdm(train_dataloader): + for train_input, train_label in train_dataloader: + batch_counter += 1 + train_label = train_label.to(device) + mask = train_input['attention_mask'].to(device) + input_id = train_input['input_ids'].squeeze(1).to(device) + + output = model(input_id, mask) + +# batch_loss = criterion(output, train_label.long()) +# total_loss_train += batch_loss.item() + +# acc = (output.argmax(dim=1) == train_label).sum().item() +# total_acc_train += acc + + # Compute Loss and Perform Back-propagation + loss = criterion(output, train_label.long()) + + + # Normalize the Gradients + loss = loss / gradient_accumulation_steps + loss.backward() + + + if (batch_counter % gradient_accumulation_steps == 0): + # Update Optimizer + optimizer.step() # or flip them? + optimizer.zero_grad() + + model.zero_grad() +# loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) +# optimizer.step() + scheduler.step() + + # ------ Validation -------- + + print('\nValidation for epoch:', epoch_num + 1) + + # Dev and test results for each corpus. We don't need to save the results. + for corpus in dev_dict_dataloader: + dev_results = get_predictions(model, + corpus, + dev_dict_dataloader[corpus]) + + path_results = 'results/dev/pytorch_' + mapping_classes + '_' + str(epoch_num+1) + if not os.path.exists(path_results): + os.makedirs(path_results) + + print_results_to_file(corpus, + dev_dict_sentences[corpus], + dev_results, + inv_mappings, #substitutions_file, + path_results) + + # ------ Test -------- + + print('\nTest results for epoch:', epoch_num + 1) + + for corpus in test_dict_dataloader: + test_results = get_predictions(model, + corpus, + test_dict_dataloader[corpus]) + + path_results = 'results/test/pytorch_' + mapping_classes + '_' + str(epoch_num+1) + if not os.path.exists(path_results): + os.makedirs(path_results) + + print_results_to_file(corpus, + test_dict_sentences[corpus], + test_results, + inv_mappings, #substitutions_file, + path_results) + + +# # we want the results of specific epochs for specific corpora. +# # we define the epochs and the corpora and we save only these results. + +# if epoch_num+1 in specific_results: +# for corpus in specific_results[epoch_num+1]: +# test_results = get_predictions(model, +# corpus, +# test_dict_dataloader[corpus], +# print_results=False) + + + # ========= New Code! ============= + # Save for each epoch the dev and test results + + + + +# ------- Start the training ------- + +print('\nModel: ', args.transformer_model) +print('Batch size: ', args.batch_size * args.gradient_accumulation_steps) +print('\nStart training...\n') +train(model, + train_dataloader, + dev_dict_dataloader, + test_dict_sentences, + test_dict_dataloader, + args.num_epochs, +# specific_results + ) +print('\nTraining Done!') + + +# ------- Testing --------- + +# print('Testing...') +# for corpus in test_dict_dataloader: +# test_results = get_predictions(model, +# corpus, +# test_dict_dataloader[corpus] +# ) +# print_results_to_file(corpus, +# test_dict_sentences[corpus], +# test_results, +# inv_mappings, +# substitutions_file) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..396a0f04275209d4371e47b3b7fc448f1de217b3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,44 @@ +adapter-transformers==3.0.1 +certifi==2023.5.7 +charset-normalizer +cmake==3.26.3 +datasets==2.4.0 +fsspec +huggingface-hub==0.14.1 +idna==3.4 +Jinja2==3.1.2 +joblib==1.2.0 +lit==16.0.3 +MarkupSafe==2.1.2 +mpmath==1.3.0 +multidict==6.0.4 +multiprocess==0.70.13 +networkx==3.1 +packaging==23.1 +pandas==2.0.1 +Pillow==9.5.0 +pyarrow==12.0.0 +python-dateutil==2.8.2 +pytz==2023.3 +PyYAML==6.0 +regex==2023.5.5 +requests==2.30.0 +responses==0.18.0 +sacremoses==0.0.53 +scikit-learn==1.2.2 +scipy==1.10.1 +six==1.16.0 +sympy==1.12 +threadpoolctl==3.1.0 +tokenizers==0.12.1 +torch==2.0.1 +torchaudio==2.0.2 +torchvision +tqdm==4.65.0 +transformers==4.18.0 +triton==2.0.0 +typing_extensions==4.5.0 +tzdata==2023.3 +urllib3==2.0.2 +xxhash==3.2.0 +yarl==1.9.2 \ No newline at end of file diff --git a/run_stuff.sh b/run_stuff.sh new file mode 100644 index 0000000000000000000000000000000000000000..d08fa1b2ea48c6cc2e5b24f371a3c35c525b47f4 --- /dev/null +++ b/run_stuff.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +#SBATCH --job-name=adapters + +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --partition=RTX6000Node +#SBATCH --gres=gpu:1 + + +# tests tests + +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 pytorch_classifier.py --batch_size 8 --num_epochs 10 --data_path '/users/melodi/emetheni/clean_data' --mappings_file 'mappings/mappings-classes-braud.tsv' + +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 huggingface_classifier.py --batch_size 4 --gradient_accumulation_steps 32 --num_epochs 1 --data_path '/users/melodi/emetheni/clean_data' --mappings_file 'mappings/mappings-classes-braud.tsv' + +# Train the adapter: +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 make_adapter.py --batch_size 8 --num_epochs 15 --data_path '/users/melodi/emetheni/sharedtask2023/data' --freeze_layers 'layer.1;layer.2;layer.3' --mappings_file 'mappings/mappings-classes-braud.tsv' + +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 make_adapter.py --batch_size 8 --num_epochs 15 --data_path '/users/melodi/emetheni/sharedtask2023/data' --freeze_layers 'layer.1;layer.2;layer.3;layer.4' --mappings_file 'mappings/mappings-classes-braud.tsv' + +srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 make_adapter.py --batch_size 8 --num_epochs 15 --data_path '/users/melodi/emetheni/sharedtask2023/data' --freeze_layers 'layer.1' --mappings_file 'mappings/mappings-classes-braud.tsv' + +# Run classifier with adapter for corpora: + + +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 1 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3' +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 2 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3' +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 3 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3' +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 4 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3' +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 5 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3' +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 6 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1-2-3' + + +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 1 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1' +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 2 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1' +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 3 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1' +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 4 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1' +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 5 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1' +# srun singularity exec /logiciels/containerCollections/CUDA10/pytorch.sif python3 adapter_classifier.py --batch_size 8 --num_epochs 6 --data_path '/users/melodi/emetheni/clean_data' --adapter_name 'A_15-epochs_frozen-1' diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cd8220c5c7a4d8407813e978ab74e701734d2d1f --- /dev/null +++ b/utils.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python +# coding: utf-8 + +import os +import torch +from transformers import AutoConfig, AutoTokenizer +from configure import parse_args +import numpy as np +from sklearn.metrics import accuracy_score + +args = parse_args() + + +def open_mappings(mappings_file): + + ''' Open the mappings file into a dictionary.''' + + mappings = {} + with open(mappings_file, 'r') as f: + next(f) + for l in f: + mappings[l.split('\t')[0]] = int(l.strip().split('\t')[-1]) + inv_mappings = {v:k for k, v in mappings.items()} + + return mappings, inv_mappings + + +def open_file(filename, mappings_dict): + + ''' Function to open a .rels file. + Arguments: + - filename: the path to a .rels file + - mappings_dict: a dictionary of mappings of unique labels to integers + Returns a list of lists, where each list is: + the line + [two sentences combined with special BERT token, encoded label] + ''' + + max_len = 254 # 512 (max bert len) / 2 (2 sents) -2 (special tokens) + lines = [] + SEP_token = '[SEP]' + + with open(filename, 'r', encoding='utf-8') as f: + next(f) + for line in f: + l = line.strip().split('\t') + + if len(l) > 1: + # chop the sentences to max_len if too long + sent_1 = l[3].split(' ') + sent_2 = l[4].split(' ') + + if len(sent_1) > max_len: + sent_1 = sent_1[:max_len] + if len(sent_2) > max_len: + sent_2 = sent_2[:max_len] + + # flip them if different direction + if args.normalize_direction == 'yes': + if l[9] == '1>2': + lines.append(l + [sent_1 + [SEP_token] + sent_2, mappings_dict[l[11].lower()]]) + else: + lines.append(l + [sent_2 + [SEP_token] + sent_1, mappings_dict[l[11].lower()]]) + else: + lines.append(l + [sent_1 + [SEP_token] + sent_2, mappings[l[11].lower()]]) + + return lines + + +def encode_batch(batch): + + """ Encodes a batch of input data using the model tokenizer. + Works for a pandas DF column, instead of a list. + """ + tokenizer = AutoTokenizer.from_pretrained(args.transformer_model) + return tokenizer(batch["text"], + max_length=512, + truncation=True, + padding="max_length" + ) + +def open_sentences(path_to_corpora, mappings_dict): + ''' Opens all the corpora and the surprise corpora in train/dev/test sets. + Uses the open_file() function from utils. + Returns: + - list of sentences for TRAIN: all the corpora and surprise corpora together + - dict of sentences for DEV: each dev set categorized per corpus + - dict of sentences for TEST: each test set categorized per corpus + ''' + + corpora = [folder for folder in os.listdir(path_to_corpora) + if not any(i in folder for i in ['.md', 'DS_', 'utils', 'ipynb'])] + + # --------------------- + train_sentences = [] + dev_dict_sentences = {} + test_dict_sentences = {} + + for corpus in corpora: + + try: + # open normal files + train_file = ['/'.join([path_to_corpora, corpus, x]) + for x in os.listdir(path_to_corpora + '/' + corpus) + if 'train' in x and 'rels' in x][0] + train_sentences += open_file(train_file, mappings_dict) + except: # some of them don't have train + pass + + #open each test separately + dev_dict_sentences[corpus] = [] + dev_file = ['/'.join([path_to_corpora,corpus,x]) + for x in os.listdir(path_to_corpora + '/' + corpus) + if 'dev' in x and 'rels' in x][0] + dev_dict_sentences[corpus] += open_file(dev_file, mappings_dict) + + #open each test separately + test_dict_sentences[corpus] = [] + test_file = ['/'.join([path_to_corpora,corpus,x]) + for x in os.listdir(path_to_corpora + '/' + corpus) + if 'test' in x and 'rels' in x][0] + test_dict_sentences[corpus] += open_file(test_file, mappings_dict) + + + return train_sentences, dev_dict_sentences, test_dict_sentences + + +# =============== +# Testing functions +# =============== + +def get_predictions(model, + corpus, + test_dataloader, + print_results=True): + + ''' Function to get the model's predictions for one corpus' test set. + Can print accuracy using scikit-learn. + Also works with dev sets -- just don't save the outputs. + Returns: list of predictions that match test file's lines. + ''' + + device = torch.device("cuda" if args.use_cuda else "cpu") + + if args.use_cuda: + model = model.cuda() + + model.eval() + test_loss, test_accuracy = 0, 0 + + all_labels = [] + all_preds = [] + + with torch.no_grad(): + for test_input, test_label in test_dataloader: + + mask = test_input['attention_mask'].to(device) + input_id = test_input['input_ids'].squeeze(1).to(device) + output = model(input_id, mask) + + logits = output[0] + logits = logits.detach().cpu().numpy() + label_ids = test_label.to('cpu').numpy() + + all_labels += label_ids.tolist() + all_preds += output.argmax(dim=1).tolist() + + assert len(all_labels) == len(all_preds) + test_acc = round(accuracy_score(all_labels, all_preds), 4) + + if print_results: + print(corpus, '\tAccuracy:\t', test_acc) + + return all_preds + + +def get_predictions_huggingface(trainer, + corpus, + test_set, + print_results=True): + + ''' SPECIFI FUNCTION FOR THE HUGGINGFACE TRAINER. + Function to get the model's predictions for one corpus' test set. + Can print accuracy using scikit-learn. + Also works with dev sets -- just don't save the outputs. + Returns: list of predictions that match test file's lines. + ''' + + results = trainer.predict(test_set) + preds = np.argmax(results.predictions, axis=1) + results = results.label_ids + test_acc = round(accuracy_score(preds, results), 4) + + if print_results: + print(corpus, '\tAccuracy:\t', test_acc, '\n') + + return preds + + +def print_results_to_file(corpus, + test_sentences, + test_results, + inv_mappings_dict, + #substitutions_file, + output_folder): + + ''' Function to print a new file with the test predictions per + the specifications of the Shared task. + Returns: one file per corpus with predictions. + ''' +# output_folder = 'results' + header = '\t'.join(['doc', + 'unit1_toks', + 'unit2_toks', + 'unit1_txt', + 'unit2_txt', + 's1_toks', + 's2_toks', + 'unit1_sent', + 'unit2_sent', + 'dir', + 'orig_label', + 'label', + 'predicted_label']) + +# # create a dict of all the substitutions that were made +# revert_substitutions = {} +# with open(substitutions_file, 'r', encoding='utf-8') as f: +# next(f) +# for line in f: +# l = line.strip().split('\t') +# if not l[1] in revert_substitutions: +# revert_substitutions[l[1]] = {} +# revert_substitutions[l[1]][l[2]] = l[0] + + # save the results in a separate folder, one file per corpus + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + results_to_write = [] + + for n, sent in enumerate(test_sentences): + label = test_results[n] + label = inv_mappings_dict[label] +# try: +# if corpus in revert_substitutions: +# if label in revert_substitutions[corpus]: +# label = revert_substitutions[corpus][label] +# except: +# pass + temp = sent[:] + [label] + results_to_write.append(temp) + + assert len(results_to_write) == len(test_sentences) + + with open(output_folder + '/' + corpus + '.tsv', 'a+', encoding='utf-8') as f: + f.write(header + '\n') + for line in results_to_write: + f.write('\t'.join([str(x) for x in line])) + f.write('\n') \ No newline at end of file