diff --git a/Linker/Linker.py b/Linker/Linker.py index 3cd6f300e59088758e3b32dc12ca08900f63293e..58197bd934263ff8bd159c922deb129b8b3522a2 100644 --- a/Linker/Linker.py +++ b/Linker/Linker.py @@ -56,6 +56,7 @@ def generate_square_subsequent_mask(sz): class Linker(Module): + def __init__(self, supertagger_path_model): super(Linker, self).__init__() diff --git a/NeuralProofNet/NeuralProofNet.py b/NeuralProofNet/NeuralProofNet.py index 19e7df5be100da5e36d33d7a4d89004b0c5efb15..73ee6073994dc3523f91769a969e439845d64ca3 100644 --- a/NeuralProofNet/NeuralProofNet.py +++ b/NeuralProofNet/NeuralProofNet.py @@ -44,6 +44,7 @@ def output_create_dir(): class NeuralProofNet(Module): + def __init__(self, supertagger_path_model, linker_path_model=None): super(NeuralProofNet, self).__init__() config = Configuration.read_config() diff --git a/README.md b/README.md index a3075af424bbdd19953b8f4549a12d4aaa0d9c62..154f242044687cdda3b4f4f51b3567d84cbf2741 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ This code was designed to work with the [DeepGrail Tagger](https://gitlab.irit.f [DeepGrail Linker](https://gitlab.irit.fr/pnria/global-helper/deepgrail-linker). -In this version the tagger is not retrained with the linker. +In this version the tagger is not retrained with the linker. Meaning they are both trained separately in training phase but in inference phase, predictions of tagger feeds inputs of linker. + ## Usage @@ -19,11 +20,12 @@ Clone the project locally. Run the script init.sh -Optional : Upload the tagger.pt and linker.pt in models. (You may need to modify 'model_tagger' in train.py.) +Optional : Upload the .pt files containing models weights in the **models** directory. ### Structure The structure should look like this : + ``` . . @@ -32,23 +34,30 @@ The structure should look like this : │ └── config.ini # contains parameters ├── requirements.txt # librairies needed ├── Datasets # TLGbank data with links -├── SuperTagger # The Supertagger directory (that you need to install) -│ ├── ... -│ └── SuperTagger # Implementation of BertForTokenClassification -│ ├── SuperTagger.py # Main class -│ └── Tagging_bert_model.py # Bert model +├── SuperTagger # The Supertagger directory (that you need +│ ├── Datasets # TLGbank data with supertags +│ └── SuperTagger # BertForTokenClassification +│ ├── SuperTagger.py # Main class +│ └── Utils +│ ├── Tagging_bert_model.py # Bert model +│ ├── SymbolTokenizer # Tags tokenizer +│ ├── SentencesTokenizer # Words tokenizer +│ └── helpers # utils ├── Linker # The Linker directory (that you need to install) │ ├── ... │ └── Linker.py # Linker class containing the neural network ├── NeuralProofNet # The NeuralProofNet directory -│ ├── ... -│ └── NeuralProofNet.py # NeuralProofNet class containing the linker and supertagger +│ ├── utils_proofnet # utils for NeuralProofNet +│ └── NeuralProofNet.py # NeuralProofNet class ├── models -│ ├── linker.pt # OPTIONAL : the pt file contaning the pretrained linker (you need to install it) -│ └── supertagger.pt # the pt file contaning the pretrained supertagger (you need to install it) -├── Output # Directory where your linker models will be saved if checkpoint=True in train -├── TensorBoard # Directory where the stats will be saved if tensorboard=True in train -└── train.py # Example of train +│ ├── linker.pt # OPTIONAL : pretrained linker +│ └── supertagger.pt # pretrained supertagger +├── Output # Directory with models backups while training +├── TensorBoard # Directory with stats +├── train_neuralproofnet.py # train for linker with the pretrained supertager +├── train_supertagger.py # train for the supertager +├── predict_supertags.py # tags predictions +└── predict_links.py # links predictions ``` @@ -57,15 +66,72 @@ The structure should look like this : The sentences should be in a column "X", the links with '_x' postfix should be in a column "Y" and the categories in a column "Z". For the links each atom_x goes with the one and only other atom_x in the sentence. +### Utils + +In order to load **m2_dataset.csv**, you can use `utils.read_csv_pgbar(...)`. This function return a pandas +dataframe. + + ## Training +### Training of supertagger + +``` +df = read_csv_pgbar(file_path,1000) +texts = df['X'].tolist() +tags = df['Z'].tolist() + +#Dict for convert ID to token (The dict is save with the model for prediction) +index_to_super = load_obj('Datasets/index_to_super') + +tagger = SuperTagger() + +bert_name = 'camembert-base' + +tagger.create_new_model(len(index_to_super), bert_name, index_to_super) +# You can load your model for re-train this +# tagger.load_weights("your/model/path") + +tagger.train(texts, tags, checkpoint=True) + +pred_without_argmax, pred_convert, bert_hidden_state = tagger.predict(texts[7]) +``` + +In train, if you use `checkpoint=True`, the model is automatically saved in a folder: Training_XX-XX_XX-XX. It saves +after each epoch. Use `tensorboard=True` for log in same folder. (`tensorboard --logdir=logs` for see logs) + +`bert_name` can be any model available on [Hugging Face](https://huggingface.co/models) + +### Training of linker + Launch train.py, if you look at it you can give another dataset file and another tagging model. In train, if you use `checkpoint=True`, the model is automatically saved in a folder: Training_XX-XX_XX-XX. It saves after each epoch. Use `tensorboard=True` for log in same folder. (`tensorboard --logdir=logs` for see logs) + ## Predicting +### Prediction of supertags + +For predict on your data you need to load a model (save with this code). + +``` +df = read_csv_pgbar(file_path,20) +texts = df['X'].tolist() + +tagger = SuperTagger() + +tagger.load_weights("your/model/path") + +pred_without_argmax, pred_convert, bert_hidden_state = tagger.predict(texts[7]) + +print(pred_convert) +#['let', 'dr(0,s,s)', 'let', 'dr(0,dr(0,s,s),np)', 'dr(0,np,n)', 'dr(0,n,n)', 'let', 'n', 'let', 'dl(0,n,n)', 'dr(0,dl(0,dl(0,n,n),dl(0,n,n)),dl(0,n,n))', 'dl(0,n,n)', 'let', 'dr(0,np,np)', 'np', 'dr(0,dl(0,np,np),np)', 'np', 'dr(0,dl(0,np,np),np)', 'np', 'dr(0,dl(0,np,s),dl(0,np,s))', 'dr(0,dl(0,np,s),np)', 'dl(1,s,s)', 'np', 'dr(0,dl(0,np,np),n)', 'n', 'dl(0,s,txt)'] +``` + +### Prediction of links + For predict on your data you need to load a model (save with this code). ``` @@ -74,9 +140,9 @@ links = linker.predict_without_categories("le chat est noir") print(links) ``` -The file ```postprocessing.py``` will allow you to draw the prediction. (limited sentence length otherwise it will be confusing) +The file ```postprocessing.py``` will allow you to draw the prediction with graphviz (you need to install it). Be careful to predict on limited sentence length otherwise the graph will not be helpful. -You can also use the function ```predict_without_categories``` which only needs the sentence. +You can also use the function ```predict_without_categories``` which only needs the sentence (it uses the supertagger to predict the tags) or ```predict_with_categories``` so you can give directlythe categories (useful to check the links without bias from the supertager). ## LICENSE diff --git a/SuperTagger/README.md b/SuperTagger/README.md index 8dc7c23c4a331a6e9b3a928beef29c00c4a2aa30..140b2b190aca3001a26fcb0369e64214d519a46e 100644 --- a/SuperTagger/README.md +++ b/SuperTagger/README.md @@ -6,117 +6,21 @@ part-of-speech taggers and supertaggers. This code was designed to work with the [DeepGrail Linker](https://gitlab.irit.fr/pnria/global-helper/deepgrail-linker) to provide a wide coverage syntactic and semantic parser for French. But the Tagger is independent, you can use it for your own tags. -## Usage -### Structure +## Structure ``` . -├── Datasets # TLGbank data -├── SuperTagger # Implementation of BertForTokenClassification -│ ├── SuperTagger.py # Main class -│ └── Tagging_bert_model.py # Bert model -├── predict.py # Example of prediction -└── train.py # Example of train +├── Datasets # TLGbank data +└── SuperTagger # BertForTokenClassification + ├── SuperTagger.py # Main class + └── Utils + ├── Tagging_bert_model.py # Bert model + ├── SymbolTokenizer # Tags tokenizer + ├── SentencesTokenizer # Words tokenizer + └── helpers # utils ``` -### Installation -Python 3.9.10 **(Warning don't use Python 3.10**+**)** -Clone the project locally. In a clean python venv do `pip install -r requirements.txt` -Download already trained models or prepare data for **your** train. - -## How To use - -**predict.py** and **train.py** show simple examples of how to use the model, feel free to look at them before using the -SupperTagger - -### Utils - -For load **m2_dataset.csv**, you can use `utils.read_csv_pgbar(...)`. This function return a pandas -dataframe. - -### Prediction - -For predict on your data you need to load a model (save with this code). - -``` -df = read_csv_pgbar(file_path,20) -texts = df['X'].tolist() - -tagger = SuperTagger() - -tagger.load_weights("your/model/path") - -pred_without_argmax, pred_convert, bert_hidden_state = tagger.predict(texts[7]) - -print(pred_convert) -#['let', 'dr(0,s,s)', 'let', 'dr(0,dr(0,s,s),np)', 'dr(0,np,n)', 'dr(0,n,n)', 'let', 'n', 'let', 'dl(0,n,n)', 'dr(0,dl(0,dl(0,n,n),dl(0,n,n)),dl(0,n,n))', 'dl(0,n,n)', 'let', 'dr(0,np,np)', 'np', 'dr(0,dl(0,np,np),np)', 'np', 'dr(0,dl(0,np,np),np)', 'np', 'dr(0,dl(0,np,s),dl(0,np,s))', 'dr(0,dl(0,np,s),np)', 'dl(1,s,s)', 'np', 'dr(0,dl(0,np,np),n)', 'n', 'dl(0,s,txt)'] -``` - -### Training - -``` -df = read_csv_pgbar(file_path,1000) -texts = df['X'].tolist() -tags = df['Z'].tolist() - -#Dict for convert ID to token (The dict is save with the model for prediction) -index_to_super = load_obj('Datasets/index_to_super') - -tagger = SuperTagger() - -bert_name = 'camembert-base' - -tagger.create_new_model(len(index_to_super), bert_name, index_to_super) -# You can load your model for re-train this -# tagger.load_weights("your/model/path") - -tagger.train(texts, tags, checkpoint=True) - -pred_without_argmax, pred_convert, bert_hidden_state = tagger.predict(texts[7]) -``` - -In train, if you use `checkpoint=True`, the model is automatically saved in a folder: Training_XX-XX_XX-XX. It saves -after each epoch. Use `tensorboard=True` for log in same folder. (`tensorboard --logdir=logs` for see logs) - -`bert_name` can be any model available on [Hugging Face](https://huggingface.co/models) - - -## LICENCE - -Copyright ou © ou Copr. CNRS, (18/07/2022) - -Contributeurs : -[Rabault Julien](https://www.linkedin.com/in/julienrabault), [de Pourtales Caroline](https://www.linkedin.com/in/caroline-de-pourtales/), Richard Moot - -Ce logiciel est un programme informatique servant à établir un Proof Net depuis une phrase française. - -Ce logiciel est régi par la licence CeCILL-C soumise au droit français et -respectant les principes de diffusion des logiciels libres. Vous pouvez -utiliser, modifier et/ou redistribuer ce programme sous les conditions -de la licence CeCILL-C telle que diffusée par le CEA, le CNRS et l'INRIA -sur le site "http://www.cecill.info". - -En contrepartie de l'accessibilité au code source et des droits de copie, -de modification et de redistribution accordés par cette licence, il n'est -offert aux utilisateurs qu'une garantie limitée. Pour les mêmes raisons, -seule une responsabilité restreinte pèse sur l'auteur du programme, le -titulaire des droits patrimoniaux et les concédants successifs. - -A cet égard l'attention de l'utilisateur est attirée sur les risques -associés au chargement, à l'utilisation, à la modification et/ou au -développement et à la reproduction du logiciel par l'utilisateur étant -donné sa spécificité de logiciel libre, qui peut le rendre complexe à -manipuler et qui le réserve donc à des développeurs et des professionnels -avertis possédant des connaissances informatiques approfondies. Les -utilisateurs sont donc invités à charger et tester l'adéquation du -logiciel à leurs besoins dans des conditions permettant d'assurer la -sécurité de leurs systèmes et ou de leurs données et, plus généralement, -à l'utiliser et l'exploiter dans les mêmes conditions de sécurité. - -Le fait que vous puissiez accéder à cet en-tête signifie que vous avez -pris connaissance de la licence CeCILL-C, et que vous en avez accepté les -termes. diff --git a/Utils/PostpreprocesTXT.py b/Utils/PostpreprocesTXT.py deleted file mode 100644 index a109e6a140f2a235c1a78b753d684f1c5fd18c4c..0000000000000000000000000000000000000000 --- a/Utils/PostpreprocesTXT.py +++ /dev/null @@ -1,163 +0,0 @@ -import itertools -import os -import re - -import numpy as np -import pandas as pd - - -# dr = / -# dl = \ -# -# def sub_tree_word(word_with_data: str): -# word = "" -# if not word_with_data.startswith("GOAL:"): -# s = word_with_data.split('|') -# word = s[0] -# tree = s[1] -# else: -# tree = word_with_data -# return word, tree -# -# -# def sub_tree_line(line_with_data: str): -# line_list = line_with_data.split() -# sentence = "" -# sub_trees = [] -# for word_with_data in line_list: -# w, t = sub_tree_word(word_with_data) -# sentence += ' ' + w -# if t not in ["\\", "/", "let"] and len(t) > 0: -# sub_trees.append([t]) -# """if ('ppp' in list(itertools.chain(*sub_trees))): -# print(sentence)""" -# return sentence, list(itertools.chain(*sub_trees)) -# -# -# def Txt_to_csv(file_name: str, result_name): -# file = open(file_name, "r", encoding="utf8") -# text = file.readlines() -# sub = [sub_tree_line(data) for data in text] -# df = pd.DataFrame(data=sub, columns=['X', 'Y']) -# df.to_csv("../Datasets/" + result_name + "_dataset_links.csv", mode='a', index=False, header=False) -# -# def Txt_to_csv_header(file_name: str, result_name): -# file = open(file_name, "r", encoding="utf8") -# text = file.readlines() -# sub = [sub_tree_line(data) for data in text] -# df = pd.DataFrame(data=sub, columns=['X', 'Y']) -# df.to_csv("../Datasets/" + result_name + "_dataset_links.csv", index=False) -def normalize_word(orig_word): - word = orig_word.lower() - if (word is "["): - word = "(" - if (word is "]"): - word = ")" - - return word - - -def read_maxentdata(path): - allwords = [] - allsuper = [] - for filename in os.listdir(path): - file = os.path.join(path, filename) - - with open(file, 'r', encoding="UTF8") as f: - superset = set() - words = "" - supertags = [] - for line in f: - line = line.strip().split() - length = len(line) - for l in range(length): - item = line[l].split('|') - if len(item) > 1: - orig_word = item[0] - word = normalize_word(orig_word) - supertag = item[1] - superset.add(supertag) - # words += ' ' +(str(orig_word)) - words += ' ' + (str(orig_word)) - supertags.append(supertag) - else: - supertag = line[l] - superset.add(supertag) - supertags.append(supertag) - allwords.append(words) - allsuper.append(supertags) - words = "" - supertags = [] - - X = np.asarray(allwords) - Z = np.asarray(allsuper) - return X, Z - -Xg,Zg = read_maxentdata("gold") -Xs,Zs= read_maxentdata("silver") -data3 = pd.read_csv('../SuperTagger/Datasets/m2_dataset.csv') - -dfs = pd.DataFrame(columns = ["X", "Y"]) -dfs['X'] = Xs -dfs['Y'] = Zs - -print(len(dfs['X'])) - -rs = pd.merge(dfs, data3, on="X",how="inner").reindex(dfs.index) -rs.drop('Y1', inplace=True, axis=1) -rs.drop('Y2', inplace=True, axis=1) -# rs.drop_duplicates() - -rs.to_csv("../Datasets/silver_dataset_links.csv", index=False) - -dfg = pd.DataFrame(columns = ["X", "Y"]) - -dfg['X'] = Xg -dfg['Y'] = Zg - -rg = pd.merge(dfg, data3, on="X",how="inner").reindex(dfg.index) -rg.drop('Y1', inplace=True, axis=1) -rg.drop('Y2', inplace=True, axis=1) -# rg.drop_duplicates() - -rg.to_csv("../Datasets/gold_dataset_links.csv", index=False) - -data1 = pd.read_csv('../Datasets/gold_dataset_links.csv') -data2 = pd.read_csv('../Datasets/silver_dataset_links.csv') -df = pd.merge(data1, data2,how='outer') -df = df.drop_duplicates(subset=['X']) - - -# -df[:len(df)-1].to_csv("../Datasets/goldANDsilver_dataset_links.csv", index=False) - -# -# import os -# i = 0 -# path = "gold" -# for filename in os.listdir(path): -# if i == 0: -# Txt_to_csv_header(os.path.join(path, filename),path) -# else : -# Txt_to_csv(os.path.join(path, filename),path) -# i+=1 -# -# i = 0 -# path = "silver" -# for filename in os.listdir(path): -# if i == 0: -# Txt_to_csv_header(os.path.join(path, filename),path) -# else : -# Txt_to_csv(os.path.join(path, filename),path) -# i+=1 -# -# # reading csv files -# data1 = pd.read_csv('../Datasets/gold_dataset_links.csv') -# data2 = pd.read_csv('../Datasets/silver_dataset_links.csv') -# data3 = pd.read_csv('../SuperTagger/Datasets/m2_dataset.csv') -# -# # using merge function by setting how='left' -# df = pd.merge(data1, data2,how='outer') - -# -# df.to_csv("../Datasets/goldANDsilver_dataset_links.csv", index=False) diff --git a/predict_links.py b/predict_links.py new file mode 100644 index 0000000000000000000000000000000000000000..3bc1db15b8965fe1ae6e2e3a98122920632f4872 --- /dev/null +++ b/predict_links.py @@ -0,0 +1,24 @@ +from NeuralProofNet.NeuralProofNet import NeuralProofNet + +# region data +a_s = "( 1 ) parmi les huit \" partants \" acquis ou potentiels , MM. Lacombe , Koehler et Laroze ne sont pas membres " \ + "du PCF . " +tags_s = [['let', 'dr(0,s,s)', 'let', 'dr(0,dr(0,s,s),np)', 'dr(0,np,n)', 'dr(0,n,n)', 'let', 'n', 'let', 'dl(0,n,n)', + 'dr(0,dl(0,dl(0,n,n),dl(0,n,n)),dl(0,n,n))', 'dl(0,n,n)', 'let', 'dr(0,np,np)', 'np', 'dr(0,dl(0,np,np),np)', + 'np', 'dr(0,dl(0,np,np),np)', 'np', 'dr(0,dl(0,np,s),dl(0,np,s))', 'dr(0,dl(0,np,s),np)', 'dl(1,s,s)', 'np', + 'dr(0,dl(0,np,np),n)', 'n', 'dl(0,s,txt)']] +# endregion + + +# region model +neuralproofnet = NeuralProofNet() +model = "models/linker.pt" +neuralproofnet.linker.load_weights(model) +# endregion + + +# region prediction +linker = neuralproofnet.linker +links = linker.predict_without_categories("le chat est noir") +print(links) +# endregion \ No newline at end of file diff --git a/predict_supertags.py b/predict_supertags.py index fc42e14ab4ced39640cf7313f48b219da8ef4d55..22725ab9e9ff7ba48676bcbef51c76c55a95c4df 100644 --- a/predict_supertags.py +++ b/predict_supertags.py @@ -1,21 +1,24 @@ from SuperTagger.SuperTagger.SuperTagger import SuperTagger from SuperTagger.SuperTagger.Utils.helpers import categorical_accuracy_str -#### DATA #### - +# region data a_s = "( 1 ) parmi les huit \" partants \" acquis ou potentiels , MM. Lacombe , Koehler et Laroze ne sont pas membres " \ "du PCF . " tags_s = [['let', 'dr(0,s,s)', 'let', 'dr(0,dr(0,s,s),np)', 'dr(0,np,n)', 'dr(0,n,n)', 'let', 'n', 'let', 'dl(0,n,n)', 'dr(0,dl(0,dl(0,n,n),dl(0,n,n)),dl(0,n,n))', 'dl(0,n,n)', 'let', 'dr(0,np,np)', 'np', 'dr(0,dl(0,np,np),np)', 'np', 'dr(0,dl(0,np,np),np)', 'np', 'dr(0,dl(0,np,s),dl(0,np,s))', 'dr(0,dl(0,np,s),np)', 'dl(1,s,s)', 'np', 'dr(0,dl(0,np,np),n)', 'n', 'dl(0,s,txt)']] +# endregion + -#### MODEL #### +# region model tagger = SuperTagger() model = "models/flaubert_super_98_V2_50e.pt" tagger.load_weights(model) +# endregion + -#### TEST #### +# region prediction _, pred_convert = tagger.predict(a_s) print("Model : ", model) @@ -31,3 +34,4 @@ print() print("\tPred_convert : ", pred_convert[0]) print() print("\tScore :", categorical_accuracy_str(pred_convert, tags_s)) +# endregion \ No newline at end of file diff --git a/train.py b/train.py deleted file mode 100644 index e9b395aa5011f43e63fac4c4f11d6f83f3a79fa1..0000000000000000000000000000000000000000 --- a/train.py +++ /dev/null @@ -1,32 +0,0 @@ -import torch - -from Linker import * -from NeuralProofNet.NeuralProofNet import NeuralProofNet -from utils import read_csv_pgbar -from Configuration import Configuration - -torch.cuda.empty_cache() -nb_sentences = 10000000 -file_path_axiom_links = 'Datasets/goldANDsilver_dataset_links.csv' -model_tagger = "models/flaubert_super_98_V2_50e.pt" - -# region config -config = Configuration.read_config() -version = config["VERSION"] -datasetConfig = config["DATASET_PARAMS"] -modelEncoderConfig = config["MODEL_ENCODER"] -modelLinkerConfig = config["MODEL_LINKER"] -modelTrainingConfig = config["MODEL_TRAINING"] -epochs = int(modelTrainingConfig['epoch']) -batch_size = int(modelTrainingConfig['batch_size']) -# endregion - -df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences) - -print("#" * 20) -print("#" * 20) -neural_proof_net = NeuralProofNet(model_tagger) -neural_proof_net.train_neuralproofnet(df_axiom_links, validation_rate=0.1, epochs=epochs, batch_size=batch_size, - checkpoint=True, tensorboard=True) -print("#" * 20) -print("#" * 20) diff --git a/train_neuralproofnet.py b/train_neuralproofnet.py index 1137f11af10c5956f02713ecd17026e965da939c..8bbc0863b162b023ae529f57f58a84cce5a03152 100644 --- a/train_neuralproofnet.py +++ b/train_neuralproofnet.py @@ -16,15 +16,21 @@ epochs = int(modelTrainingConfig['epoch']) batch_size = int(modelTrainingConfig['batch_size']) # endregion + +# region data nb_sentences = 100 file_path_axiom_links = 'Datasets/goldANDsilver_dataset_links.csv' -model_tagger = "models/flaubert_super_98_V2_50e.pt" df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences) +# endregion + +# region model print("#" * 20) print("#" * 20) +model_tagger = "models/flaubert_super_98_V2_50e.pt" neural_proof_net = NeuralProofNet(model_tagger) neural_proof_net.train_neuralproofnet(df_axiom_links, validation_rate=0.1, epochs=epochs, batch_size=batch_size, checkpoint=True, tensorboard=True) print("#" * 20) print("#" * 20) +# endregion \ No newline at end of file diff --git a/train_supertagger.py b/train_supertagger.py index 0ee5a85eea57b01d1933370b847a5f318a87e28b..3c0bde0e6ddba0824f41588d8a222aa8751f432b 100644 --- a/train_supertagger.py +++ b/train_supertagger.py @@ -1,31 +1,24 @@ from SuperTagger.SuperTagger.SuperTagger import SuperTagger from SuperTagger.SuperTagger.Utils.helpers import read_csv_pgbar, load_obj -#### DATA #### +# region data file_path = 'SuperTagger/Datasets/m2_dataset_V2.csv' nb_sentences = 100 df = read_csv_pgbar(file_path,nb_sentences) - texts = df['X'].tolist() tags = df['Z'].tolist() -test_s = texts[:4] -tags_s = tags[:4] - index_to_super = load_obj('SuperTagger/Datasets/index_to_super') +# endregion + -#### MODEL #### +# region model tagger = SuperTagger() tagger.create_new_model(len(index_to_super),'camembert-base',index_to_super) +## If you wnat to upload a pretrained model # tagger.load_weights("models/model_check.pt") tagger.train(texts, tags, epochs=2, batch_size=16, validation_rate=0.1, tensorboard=True, checkpoint=True) - -#### TEST #### -pred = tagger.predict(test_s) - -print(test_s) -print() -print(pred) +# endregion