Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
discut22
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MELODI
AnDiAMO
discourseSegmentation
discut22
Commits
068f5571
Commit
068f5571
authored
2 years ago
by
laura.riviere
Browse files
Options
Downloads
Patches
Plain Diff
add fine-tuning
parent
237a227a
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
code/config_global_4.json
+49
-0
49 additions, 0 deletions
code/config_global_4.json
code/discut22_2.py
+145
-74
145 additions, 74 deletions
code/discut22_2.py
code/utils/training_allennlp.py
+1
-0
1 addition, 0 deletions
code/utils/training_allennlp.py
with
195 additions
and
74 deletions
code/config_global_4.json
0 → 100644
+
49
−
0
View file @
068f5571
{
"usecase_description"
:
"Config file for usecase_4 : from a dataset, splited in train/dev/test, fine-tune a model (= made of fine-tune of a LM) and test on testset."
,
"data_raw"
:
{
"name"
:
"eng.rst.rstdt"
,
"exte"
:
".conllu"
,
"language"
:
"en"
,
"existing_metadata"
:
true
},
"steps"
:{
"main"
:
"fine_tune"
,
"pre-processing"
:
{
"to_do"
:
false
,
"syntactic_tool"
:
"stanza"
,
"sentence_split"
:
true
,
"tokenization"
:
true
,
"syntactic_parsing"
:
true
,
"create_metadata"
:
{
"to_do"
:
false
,
"line"
:
"paragraph"
,
"sent"
:
"sent"
}
},
"discourse_segmenter"
:
{
"model"
:
"/home/lriviere/andiamo/discut22/data/eng.rst.rstdt/results_lundi9/model.tar.gz"
,
"training"
:
{
"toolkit"
:
"allennlp"
,
"pre_trained_lm"
:
"bert"
,
"config_file"
:
"/home/lriviere/andiamo/discut22/model/config_training_bert_m.jsonnet"
,
"train_data_path"
:
"/home/lriviere/andiamo/discut22/data/eng.sdrt.stac/eng.sdrt.stac_train.conllu"
,
"validation_data_path"
:
"/home/lriviere/andiamo/discut22/data/eng.sdrt.stac/eng.sdrt.stac_dev.conllu"
}
},
"gold_test_data_path"
:
"eng.rst.rstdt_test"
},
"output"
:{
"conll_file"
:{
"to_do"
:
true
,
"metadata"
:
true
,
"with_gold_labels"
:
true
},
"txt_file"
:{
"to_do"
:
true
,
"metadata"
:
true
}
}
}
This diff is collapsed.
Click to expand it.
code/discut22_2.py
+
145
−
74
View file @
068f5571
...
@@ -20,6 +20,7 @@ import utils.conll2bracket as c2bracket
...
@@ -20,6 +20,7 @@ import utils.conll2bracket as c2bracket
import
utils.seg_eval
as
seg_eval
import
utils.seg_eval
as
seg_eval
class
Data
:
class
Data
:
def
__init__
(
self
,
infos
,
stamp
):
def
__init__
(
self
,
infos
,
stamp
):
...
@@ -33,17 +34,21 @@ class Data:
...
@@ -33,17 +34,21 @@ class Data:
self
.
resu
=
f
"
{
self
.
path
}
/results_
{
stamp
}
"
self
.
resu
=
f
"
{
self
.
path
}
/results_
{
stamp
}
"
self
.
meta
=
infos
[
'
existing_metadata
'
]
self
.
meta
=
infos
[
'
existing_metadata
'
]
def
create_folders
(
self
):
# -> can be rtansfor into method of class
def
create_folders
(
self
,
ft
=
None
):
# -> can be rtansfor into method of class
for
it
in
[
self
.
conv
,
self
.
resu
]:
folders_list
=
[
self
.
conv
,
self
.
resu
]
#folders_list=[self.conv]
if
ft
!=
None
:
self
.
fine
=
f
"
{
self
.
resu
}
/fine_tune_
{
ft
}
"
#folders_list.append(self.fine) # made automatically by allennlp
for
it
in
folders_list
:
print
(
f
"
----> Checking/creating folder
{
it
}
.
"
)
print
(
f
"
----> Checking/creating folder
{
it
}
.
"
)
if
not
os
.
path
.
isdir
(
it
):
if
not
os
.
path
.
isdir
(
it
):
os
.
mkdir
(
it
)
os
.
mkdir
(
it
)
my_logs
[
'
folders
'
]
=
f
"
{
self
.
conv
}
,
{
self
.
resu
}
"
def
pre_processing
(
self
,
steps
):
def
pre_processing
(
self
,
steps
,
file_in
=
None
):
print
(
"
----> Preprocessing input data.
"
)
file_in
=
self
.
raw
if
file_in
==
None
else
file_in
file_in
=
self
.
raw
if
steps
.
pre_process_to_do
==
True
:
if
steps
.
pre_process_to_do
==
True
:
print
(
f
"
----> Preprocessing
{
self
.
raw
}
.
"
)
file_out
=
f
"
{
self
.
conv
}
/
{
self
.
name
}
.conll
"
file_out
=
f
"
{
self
.
conv
}
/
{
self
.
name
}
.conll
"
if
steps
.
synt_tool
==
"
stanza
"
:
if
steps
.
synt_tool
==
"
stanza
"
:
processors
=
[]
processors
=
[]
...
@@ -65,7 +70,7 @@ class Data:
...
@@ -65,7 +70,7 @@ class Data:
exit
(
f
"
Exited. Not valid syntactic tool:
\"
{
steps
.
synt_tool
}
\"
. Options:
\"
stanza
\"
. Change your config file.
"
)
exit
(
f
"
Exited. Not valid syntactic tool:
\"
{
steps
.
synt_tool
}
\"
. Options:
\"
stanza
\"
. Change your config file.
"
)
else
:
else
:
file_out
=
file_in
file_out
=
file_in
my_
logs
[
'
data_preprocessed
'
]
=
file_out
logs
.
add_infos
(
'
data_preprocessed
'
,
file_out
)
self
.
preprocessed
=
file_out
self
.
preprocessed
=
file_out
def
make_ner_format
(
self
):
def
make_ner_format
(
self
):
...
@@ -78,72 +83,100 @@ class Data:
...
@@ -78,72 +83,100 @@ class Data:
self
.
ner
=
f
"
{
self
.
conv
}
/
{
self
.
name
}
.ner
"
self
.
ner
=
f
"
{
self
.
conv
}
/
{
self
.
name
}
.ner
"
print
(
f
"
----> Making NER format
{
self
.
ner
}
.
"
)
print
(
f
"
----> Making NER format
{
self
.
ner
}
.
"
)
conv_to_ner
.
main
(
self
.
preprocessed
,
self
.
ner
,
"
conll
"
)
# <-- TODO faire en relatif#TODO add same for train/dev/test for config train
conv_to_ner
.
main
(
self
.
preprocessed
,
self
.
ner
,
"
conll
"
)
# <-- TODO faire en relatif#TODO add same for train/dev/test for config train
my_logs
[
'
data_ner
'
]
=
self
.
ner
logs
.
add_infos
(
'
data_ner
'
,
self
.
ner
)
def
make_predictions
(
self
,
steps
):
def
make_predictions
(
self
,
steps
,
js_name
=
None
,
fi_ner
=
None
,
model
=
None
):
self
.
pred_json
=
f
"
{
self
.
resu
}
/
{
self
.
name
}
_pred.json
"
js_name
=
self
.
name
if
js_name
==
None
else
js_name
cmd
=
f
"
allennlp predict --use-dataset-reader --output-file
{
self
.
pred_json
}
{
steps
.
model_path
}
{
self
.
ner
}
&>
{
self
.
resu
}
/logs_predictions.txt
"
fi_ner
=
self
.
ner
if
fi_ner
==
None
else
fi_ner
model
=
steps
.
model_path
if
model
==
None
else
model
self
.
pred_json
=
f
"
{
self
.
resu
}
/
{
js_name
}
_pred.json
"
cmd
=
f
"
allennlp predict --use-dataset-reader --output-file
{
self
.
pred_json
}
{
model
}
{
fi_ner
}
&>
{
self
.
resu
}
/logs_predictions.txt
"
print
(
f
"
----> Making predictions:
{
cmd
}
.
"
)
print
(
f
"
----> Making predictions:
{
cmd
}
.
"
)
os
.
system
(
cmd
)
os
.
system
(
cmd
)
my_
logs
[
'
predictions_cmd
'
]
=
cmd
logs
.
add_infos
(
'
predictions_cmd
'
,
cmd
)
def
pred_json_to_conll_w_metadata_w_gold
(
self
,
name
=
None
):
# here and 3 below..sorry..factorsation TBD
def
pred_json_to_conll_w_metadata_w_gold
(
self
):
# here and 3 below..sorry..factorsation TBD
name
=
self
.
name
if
name
==
None
else
name
self
.
pred_conll_meta_gold
=
f
"
{
self
.
resu
}
/
{
self
.
name
}
_pred_meta_gold.conll
"
self
.
pred_conll_meta_gold
=
f
"
{
self
.
resu
}
/
{
name
}
_pred_meta_gold.conll
"
json_to_connl
.
js2conllNmetaNgold
(
self
.
pred_json
,
self
.
pred_conll_meta_gold
,
"
conll
"
,
self
.
preprocessed
)
json_to_connl
.
js2conllNmetaNgold
(
self
.
pred_json
,
self
.
pred_conll_meta_gold
,
"
conll
"
,
self
.
preprocessed
)
return
self
.
pred_conll_meta_gold
return
self
.
pred_conll_meta_gold
def
pred_json_to_conll_w_metadata
(
self
):
def
pred_json_to_conll_w_metadata
(
self
,
name
=
None
):
self
.
pred_meta_conll
=
f
"
{
self
.
resu
}
/
{
self
.
name
}
_pred_meta.conll
"
name
=
self
.
name
if
name
==
None
else
name
self
.
pred_meta_conll
=
f
"
{
self
.
resu
}
/
{
name
}
_pred_meta.conll
"
json_to_connl
.
js2conllNmeta
(
self
.
pred_json
,
self
.
pred_meta_conll
,
"
conll
"
,
self
.
preprocessed
)
json_to_connl
.
js2conllNmeta
(
self
.
pred_json
,
self
.
pred_meta_conll
,
"
conll
"
,
self
.
preprocessed
)
return
self
.
pred_meta_conll
return
self
.
pred_meta_conll
def
pred_json_to_conll_w_gold
(
self
):
def
pred_json_to_conll_w_gold
(
self
,
name
=
None
):
self
.
pred_conll_gold
=
f
"
{
self
.
resu
}
/
{
self
.
name
}
_pred_gold.conll
"
name
=
self
.
name
if
name
==
None
else
name
self
.
pred_conll_gold
=
f
"
{
self
.
resu
}
/
{
name
}
_pred_gold.conll
"
json_to_connl
.
js2conll
(
self
.
pred_json
,
self
.
pred_conll_gold
,
"
conll
"
)
json_to_connl
.
js2conll
(
self
.
pred_json
,
self
.
pred_conll_gold
,
"
conll
"
)
return
self
.
pred_conll_gold
return
self
.
pred_conll_gold
def
pred_json_to_conll
(
self
):
def
pred_json_to_conll
(
self
,
name
=
None
):
self
.
pred_conll
=
f
"
{
self
.
resu
}
/
{
self
.
name
}
_pred.conll
"
name
=
self
.
name
if
name
==
None
else
name
self
.
pred_conll
=
f
"
{
self
.
resu
}
/
{
name
}
_pred.conll
"
json_to_connl
.
js2conll
(
self
.
pred_json
,
self
.
pred_conll
,
"
conll
"
)
json_to_connl
.
js2conll
(
self
.
pred_json
,
self
.
pred_conll
,
"
conll
"
)
return
self
.
pred_conll
return
self
.
pred_conll
def
brackets_txt
(
self
):
def
brackets_txt
(
self
,
name
=
None
):
self
.
brack
=
f
"
{
self
.
resu
}
/
{
self
.
name
}
_brac.txt
"
name
=
self
.
name
if
name
==
None
else
name
self
.
brack
=
f
"
{
self
.
resu
}
/
{
name
}
_brac.txt
"
c2bracket
.
conll2brackets
(
self
.
pred_conll
,
self
.
brack
)
c2bracket
.
conll2brackets
(
self
.
pred_conll
,
self
.
brack
)
return
self
.
brack
def
brackets_txt_with_metadata
(
self
):
def
brackets_txt_with_metadata
(
self
,
name
=
None
):
self
.
brack_meta
=
f
"
{
self
.
resu
}
/
{
self
.
name
}
_brac_meta.txt
"
name
=
self
.
name
if
name
==
None
else
name
self
.
brack_meta
=
f
"
{
self
.
resu
}
/
{
name
}
_brac_meta.txt
"
c2bracket
.
conll2brackets_with_meta
(
self
.
pred_meta_conll
,
self
.
brack_meta
)
c2bracket
.
conll2brackets_with_meta
(
self
.
pred_meta_conll
,
self
.
brack_meta
)
return
self
.
brack_meta
def
evaluation
(
self
,
steps
,
prod
,
gold
=
None
,
name
=
None
,
model
=
None
):
def
evaluation
(
self
,
prod
):
self
.
basic_metrics
=
f
"
{
self
.
resu
}
/Evaluation_metrics.json
"
self
.
basic_metrics
=
f
"
{
self
.
resu
}
/Evaluation_metrics.json
"
if
self
.
exte
==
"
.conll
"
or
self
.
exte
==
"
.conllu
"
:
# get gold file
gold
=
self
.
preprocessed
if
gold
==
None
else
gold
gold
=
self
.
raw
name
=
self
.
name
if
name
==
None
else
name
else
:
model
=
steps
.
model_path
if
model
==
None
else
model
gold
=
self
.
preprocessed
if
prod
.
conll_todo
==
False
:
# get pred_file
if
prod
.
conll_todo
==
False
:
# get pred_file
to compute metrics with seg_eval
pred
=
self
.
pred_json_to_conll
()
pred
=
self
.
pred_json_to_conll
(
name
)
else
:
else
:
if
prod
.
conll_meta
==
True
:
if
prod
.
conll_meta
==
True
:
if
prod
.
conll_w_gold
==
True
:
if
prod
.
conll_w_gold
==
True
:
pred
=
self
.
pred_json_to_conll_w_metadata_w_gold
()
pred
=
self
.
pred_json_to_conll_w_metadata_w_gold
(
name
)
else
:
else
:
pred
=
self
.
pred_json_to_conll_w_metadata
()
pred
=
self
.
pred_json_to_conll_w_metadata
(
name
)
else
:
else
:
if
prod
.
conll_w_gold
==
True
:
if
prod
.
conll_w_gold
==
True
:
pred
=
self
.
pred_json_to_conll_w_gold
()
pred
=
self
.
pred_json_to_conll_w_gold
(
name
)
else
:
else
:
pred
=
self
.
pred_json_to_conll
()
pred
=
self
.
pred_json_to_conll
(
name
)
print
(
f
"
----> Predictions to file
{
pred
}
"
)
print
(
f
"
----> Predictions to file
{
pred
}
.
"
)
print
(
f
"
----> Evaluation scores to file
{
self
.
basic_metrics
}
"
)
print
(
f
"
----> Evaluation scores to file
{
self
.
basic_metrics
}
.
"
)
scores_dict
=
seg_eval
.
get_scores
(
gold
,
pred
)
scores_dict
=
seg_eval
.
get_scores
(
gold
,
pred
)
scores_dict
[
'
model
'
]
=
model
logs
.
add_infos
(
'
basic_metrics
'
,
scores_dict
)
logs
.
add_infos
(
'
output_conll_file
'
,
pred
)
with
open
(
self
.
basic_metrics
,
'
w
'
)
as
fo
:
with
open
(
self
.
basic_metrics
,
'
w
'
)
as
fo
:
json
.
dump
(
scores_dict
,
fo
)
json
.
dump
(
scores_dict
,
fo
,
indent
=
4
)
if
prod
.
txt_todo
==
True
:
if
prod
.
txt_meta
==
True
:
pred
=
f
"
{
self
.
resu
}
/
{
name
}
_pred_meta.conll
"
if
not
os
.
path
.
isfile
(
pred
):
self
.
pred_json_to_conll_w_metadata
(
name
)
pred_txt
=
self
.
brackets_txt_with_metadata
(
name
)
# os.system(f"rm {pred})
else
:
pred
=
f
"
{
self
.
resu
}
/
{
name
}
_pred.conll
"
if
not
os
.
path
.
isfile
(
pred
):
self
.
pred_json_to_conll
pred_txt
=
self
.
brackets_txt
(
name
)
# os.system(f"rm {pred})
print
(
f
"
----> Predictions to file
{
pred_txt
}
.
"
)
logs
.
add_infos
(
'
output_txt_file
'
,
pred_txt
)
class
Output
:
class
Output
:
...
@@ -155,7 +188,6 @@ class Output:
...
@@ -155,7 +188,6 @@ class Output:
self
.
txt_meta
=
infos
[
'
txt_file
'
][
'
metadata
'
]
self
.
txt_meta
=
infos
[
'
txt_file
'
][
'
metadata
'
]
class
Process
:
class
Process
:
def
__init__
(
self
,
infos
):
def
__init__
(
self
,
infos
):
self
.
main
=
infos
[
"
main
"
]
# train test annotation
self
.
main
=
infos
[
"
main
"
]
# train test annotation
...
@@ -169,7 +201,7 @@ class Process:
...
@@ -169,7 +201,7 @@ class Process:
self
.
meta_line
=
infos
[
'
pre-processing
'
][
'
create_metadata
'
][
'
line
'
]
self
.
meta_line
=
infos
[
'
pre-processing
'
][
'
create_metadata
'
][
'
line
'
]
self
.
meta_sent
=
infos
[
'
pre-processing
'
][
'
create_metadata
'
][
'
sent
'
]
self
.
meta_sent
=
infos
[
'
pre-processing
'
][
'
create_metadata
'
][
'
sent
'
]
if
self
.
main
==
"
train
"
:
if
self
.
main
==
"
train
"
or
"
fine_tune
"
:
self
.
set_train
=
infos
[
'
discourse_segmenter
'
][
'
training
'
][
'
train_data_path
'
]
self
.
set_train
=
infos
[
'
discourse_segmenter
'
][
'
training
'
][
'
train_data_path
'
]
self
.
set_dev
=
infos
[
'
discourse_segmenter
'
][
'
training
'
][
'
validation_data_path
'
]
self
.
set_dev
=
infos
[
'
discourse_segmenter
'
][
'
training
'
][
'
validation_data_path
'
]
self
.
set_test
=
infos
[
'
gold_test_data_path
'
]
self
.
set_test
=
infos
[
'
gold_test_data_path
'
]
...
@@ -182,9 +214,8 @@ class Process:
...
@@ -182,9 +214,8 @@ class Process:
self
.
test_data
=
infos
[
'
gold_test_data_path
'
]
self
.
test_data
=
infos
[
'
gold_test_data_path
'
]
def
get_evaluation_status
(
self
):
def
get_evaluation_status
(
self
):
if
self
.
main
==
"
test
"
:
if
self
.
main
==
"
test
"
or
self
.
main
==
"
train
"
or
self
.
main
==
"
fine_tune
"
:
self
.
eval
=
True
self
.
eval
=
True
#elif self.main == "train":
def
get_model
(
self
):
def
get_model
(
self
):
self
.
model_path
=
""
self
.
model_path
=
""
...
@@ -200,27 +231,38 @@ class Process:
...
@@ -200,27 +231,38 @@ class Process:
else
:
else
:
self
.
model_path
=
self
.
model
self
.
model_path
=
self
.
model
def
get_data_sets
(
self
,
data
):
def
get_data_for_train
(
self
,
data
):
# from names get path to input
self
.
train_raw
=
f
"
{
data
.
path
}
/
{
self
.
set_train
}{
data
.
exte
}
"
self
.
train_raw
=
f
"
{
data
.
path
}
/
{
self
.
set_train
}{
data
.
exte
}
"
self
.
dev_raw
=
f
"
{
data
.
path
}
/
{
self
.
set_dev
}{
data
.
exte
}
"
self
.
dev_raw
=
f
"
{
data
.
path
}
/
{
self
.
set_dev
}{
data
.
exte
}
"
self
.
test_raw
=
f
"
{
data
.
path
}
/
{
self
.
set_test
}{
data
.
exte
}
"
self
.
test_raw
=
f
"
{
data
.
path
}
/
{
self
.
set_test
}{
data
.
exte
}
"
def
get_data_for_fine_tune
(
self
,
data
):
"""
spec: testset is the same that data_raw_name /
trainset & devset are elsewhere and config fill with path not just name
"""
self
.
ft_stamp
=
re
.
sub
(
'
^.*/
'
,
''
,
self
.
set_train
)
self
.
train_raw
=
self
.
set_train
self
.
dev_raw
=
self
.
set_dev
self
.
test_raw
=
f
"
{
data
.
path
}
/
{
self
.
set_test
}{
data
.
exte
}
"
# reset names to go ez pz for ner formatage
self
.
set_train
=
re
.
sub
(
'
\.[^\.]+$
'
,
''
,
self
.
ft_stamp
)
self
.
set_dev
=
re
.
sub
(
'
\.[^\.]+$
'
,
''
,
re
.
sub
(
'
^.*/
'
,
''
,
self
.
dev_raw
))
def
make_sets_ner_format
(
self
,
data
):
#[steps.set_train, steps.set_dev, steps.set_test]
def
make_sets_ner_format
(
self
,
data
):
#[steps.set_train, steps.set_dev, steps.set_test]
self
.
train_ner
=
f
"
{
data
.
conv
}
/
{
self
.
set_train
}{
data
.
exte
}
.ner
"
self
.
train_ner
=
f
"
{
data
.
conv
}
/
{
self
.
set_train
}{
data
.
exte
}
.ner
"
self
.
dev_ner
=
f
"
{
data
.
conv
}
/
{
self
.
set_dev
}{
data
.
exte
}
.ner
"
self
.
dev_ner
=
f
"
{
data
.
conv
}
/
{
self
.
set_dev
}{
data
.
exte
}
.ner
"
self
.
test_ner
=
f
"
{
data
.
conv
}
/
{
self
.
set_test
}{
data
.
exte
}
.ner
"
self
.
test_ner
=
f
"
{
data
.
conv
}
/
{
self
.
set_test
}{
data
.
exte
}
.ner
"
print
(
f
"
----> Making NER format
{
self
.
train_ner
}
.
"
)
print
(
f
"
----> Making NER format
{
self
.
train_ner
}
.
"
)
conv_to_ner
.
main
(
self
.
train_raw
,
self
.
train_ner
,
"
conll
"
)
conv_to_ner
.
main
(
self
.
train_raw
,
self
.
train_ner
,
"
conll
"
)
print
(
f
"
----> Making NER format
{
self
.
dev_ner
}
.
"
)
print
(
f
"
----> Making NER format
{
self
.
dev_ner
}
.
"
)
conv_to_ner
.
main
(
self
.
dev_raw
,
self
.
dev_ner
,
"
conll
"
)
conv_to_ner
.
main
(
self
.
dev_raw
,
self
.
dev_ner
,
"
conll
"
)
print
(
f
"
----> Making NER format
{
self
.
test_ner
}
.
"
)
print
(
f
"
----> Making NER format
{
self
.
test_ner
}
.
"
)
conv_to_ner
.
main
(
self
.
test_raw
,
self
.
test_ner
,
"
conll
"
)
conv_to_ner
.
main
(
self
.
test_raw
,
self
.
test_ner
,
"
conll
"
)
#self.ner = f"{self.preprocessed}.ner"
#self.ner = f"{self.conv}/{self.name}.ner"
#my_logs['data_ner'] = self.ner
def
update_training_config
(
self
):
def
update_training_config
(
self
):
logs
.
add_infos
(
'
training_config
'
,
self
.
tr_config
)
self
.
tr_config_updated
=
re
.
sub
(
'
.jsonnet$
'
,
'
_up.jsonnet
'
,
self
.
tr_config
)
self
.
tr_config_updated
=
re
.
sub
(
'
.jsonnet$
'
,
'
_up.jsonnet
'
,
self
.
tr_config
)
with
open
(
self
.
tr_config
,
'
r
'
)
as
js
:
with
open
(
self
.
tr_config
,
'
r
'
)
as
js
:
tr_conf
=
json
.
load
(
js
)
tr_conf
=
json
.
load
(
js
)
...
@@ -228,10 +270,23 @@ class Process:
...
@@ -228,10 +270,23 @@ class Process:
tr_conf
[
'
validation_data_path
'
]
=
self
.
dev_ner
tr_conf
[
'
validation_data_path
'
]
=
self
.
dev_ner
with
open
(
self
.
tr_config_updated
,
'
w
'
)
as
js
:
with
open
(
self
.
tr_config_updated
,
'
w
'
)
as
js
:
json
.
dump
(
tr_conf
,
js
)
json
.
dump
(
tr_conf
,
js
)
logs
.
add_infos
(
'
training_config_updated
'
,
self
.
tr_config_updated
)
def
training
(
self
,
data
):
def
training
(
self
,
data
):
cmd
=
f
"
allennlp train -s
{
data
.
resu
}
{
self
.
tr_config_updated
}
&>
{
data
.
resu
}
/logs_training.txt
"
#cmd = f"allennlp train -s {data.resu} -f {self.tr_config_updated} &> {data.resu}/logs_training.txt"
cmd
=
f
"
allennlp train -s
{
data
.
resu
}
{
self
.
tr_config_updated
}
"
# &> {data.resu}/logs_training.txt"
print
(
f
"
----> Training :
{
cmd
}
"
)
os
.
system
(
cmd
)
steps
.
model_path
=
f
"
{
data
.
resu
}
/model.tar.gz
"
logs
.
add_infos
(
'
model_to make predictions
'
,
self
.
model
)
def
fine_tuning
(
self
,
data
):
logs
.
add_infos
(
'
model_to be fine-tuned
'
,
self
.
model
)
cmd
=
f
"
allennlp fine-tune -m
{
self
.
model_path
}
-c
{
self
.
tr_config_updated
}
-s
{
data
.
fine
}
"
# &> {data.resu}/logs_fine-tuning.txt"
print
(
f
"
----> Fine-tuning :
{
cmd
}
"
)
os
.
system
(
cmd
)
os
.
system
(
cmd
)
self
.
model_ft_path
=
f
"
{
data
.
fine
}
/model.tar.gz
"
logs
.
add_infos
(
'
model_to make predictions
'
,
self
.
model_ft_path
)
def
get_stamp
():
def
get_stamp
():
...
@@ -245,20 +300,23 @@ def get_config_infos(config, stamp):
...
@@ -245,20 +300,23 @@ def get_config_infos(config, stamp):
data
=
Data
(
infos
[
'
data_raw
'
],
stamp
)
data
=
Data
(
infos
[
'
data_raw
'
],
stamp
)
steps
=
Process
(
infos
[
'
steps
'
])
steps
=
Process
(
infos
[
'
steps
'
])
prod
=
Output
(
infos
[
'
output
'
])
prod
=
Output
(
infos
[
'
output
'
])
my_logs
[
"
config
"
]
=
infos
return
data
,
steps
,
prod
return
data
,
steps
,
prod
def
print_logs
(
dict_logs
):
class
Logs
:
file_logs
=
f
"
{
data
.
resu
}
/logs_processes.json
"
def
__init__
(
self
):
with
open
(
file_logs
,
'
w
'
)
as
fi
:
self
.
file_path
=
f
"
{
data
.
resu
}
/logs_processes.json
"
json
.
dump
(
dict_logs
,
fi
,
indent
=
4
)
self
.
dict
=
{}
def
add_infos
(
self
,
key
,
value
):
self
.
dict
[
key
]
=
value
def
print
(
self
):
with
open
(
self
.
file_path
,
'
w
'
,
encoding
=
'
utf-8
'
)
as
fl
:
json
.
dump
(
self
.
dict
,
fl
,
indent
=
4
)
if
__name__
==
'
__main__
'
:
if
__name__
==
'
__main__
'
:
my_logs
=
{}
stamp
=
get_stamp
()
stamp
=
get_stamp
()
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'
--config
'
,
help
=
'
Config file in JSON.
'
)
parser
.
add_argument
(
'
--config
'
,
help
=
'
Config file in JSON.
'
)
...
@@ -266,32 +324,45 @@ if __name__ == '__main__':
...
@@ -266,32 +324,45 @@ if __name__ == '__main__':
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
config
=
args
.
config
config
=
args
.
config
stamp
=
args
.
name
stamp
=
args
.
name
my_logs
[
"
stamp
"
]
=
stamp
data
,
steps
,
prod
=
get_config_infos
(
config
,
stamp
)
data
,
steps
,
prod
=
get_config_infos
(
config
,
stamp
)
data
.
create_folders
()
data
.
create_folders
(
ft
=
None
)
logs
=
Logs
()
logs
.
add_infos
(
"
stamp
"
,
stamp
)
logs
.
add_infos
(
"
infos
"
,
config
)
if
steps
.
main
==
"
annotation
"
or
steps
.
main
==
"
test
"
:
if
steps
.
main
==
"
annotation
"
or
steps
.
main
==
"
test
"
:
data
.
pre_processing
(
steps
)
data
.
pre_processing
(
steps
)
data
.
make_ner_format
()
data
.
make_ner_format
()
steps
.
get_model
()
steps
.
get_model
()
data
.
make_predictions
(
steps
)
# output allennlp JSON
data
.
make_predictions
(
steps
)
# output allennlp JSON
steps
.
get_evaluation_status
()
if
steps
.
eval
==
True
:
data
.
evaluation
(
steps
,
prod
)
elif
steps
.
main
==
"
train
"
:
elif
steps
.
main
==
"
train
"
:
steps
.
get_data_
sets
(
data
)
#[steps.set_train, steps.set_dev, steps.set_test]
steps
.
get_data_
for_train
(
data
)
#[steps.set_train, steps.set_dev, steps.set_test]
#
data
preprocessing
data
.
pre
_
processing
(
steps
,
file_in
=
steps
.
test_raw
)
steps
.
make_sets_ner_format
(
data
)
steps
.
make_sets_ner_format
(
data
)
steps
.
update_training_config
()
steps
.
update_training_config
()
steps
.
training
(
data
)
steps
.
training
(
data
)
data
.
make_predictions
(
steps
,
js_name
=
steps
.
set_test
,
fi_ner
=
steps
.
test_ner
)
steps
.
get_evaluation_status
()
if
steps
.
eval
==
True
:
data
.
evaluation
(
steps
,
prod
,
name
=
steps
.
test_data
)
elif
steps
.
main
==
"
fine_tune
"
:
#steps.get_evaluation_status()
steps
.
get_data_for_fine_tune
(
data
)
#if steps.eval == True:
data
.
create_folders
(
steps
.
ft_stamp
)
#data.evaluation(prod)
data
.
pre_processing
(
steps
,
file_in
=
steps
.
test_raw
)
steps
.
make_sets_ner_format
(
data
)
steps
.
get_model
()
# model to be fine-tune
steps
.
update_training_config
()
steps
.
fine_tuning
(
data
)
data
.
make_predictions
(
steps
,
js_name
=
steps
.
set_test
,
fi_ner
=
steps
.
test_ner
,
model
=
steps
.
model_ft_path
)
steps
.
get_evaluation_status
()
if
steps
.
eval
==
True
:
data
.
evaluation
(
steps
,
prod
,
name
=
steps
.
test_data
,
model
=
steps
.
model_ft_path
)
print_logs
(
my_logs
)
# <-- attention variable globale !
logs
.
print
()
\ No newline at end of file
\ No newline at end of file
This diff is collapsed.
Click to expand it.
code/utils/training_allennlp.py
+
1
−
0
View file @
068f5571
...
@@ -50,6 +50,7 @@ def main(steps):
...
@@ -50,6 +50,7 @@ def main(steps):
# TODO:
# TODO:
#### train, has_par == true, en fait on fine_tune...
#### train, has_par == true, en fait on fine_tune...
#allennlp fine-tune -m Results_${CONFIG}/results_${PARENT}_${MODEL}/model.tar.gz -c ${CODE}configs/${MODEL}.jsonnet -s Results_${CONFIG}/results_${DATASET}-${PARENT}_${MODEL} --include-package allen_custom.custom_conll_reader --include-package allen_custom.custom_simple_tagger --include-package allen_custom.custom_disrpt_reader --include-package allen_custom.custom_bert_token_embedder
#allennlp fine-tune -m Results_${CONFIG}/results_${PARENT}_${MODEL}/model.tar.gz -c ${CODE}configs/${MODEL}.jsonnet -s Results_${CONFIG}/results_${DATASET}-${PARENT}_${MODEL} --include-package allen_custom.custom_conll_reader --include-package allen_custom.custom_simple_tagger --include-package allen_custom.custom_disrpt_reader --include-package allen_custom.custom_bert_token_embedder
# allennlp fine-tune -m MODEL_ARCHIVE -c CONFIG_FILE -s SERIALIZATION_DIR -o overrides
# TODO
# TODO
### ensuite prediction sur valset ou "parent test" ou "finetune test"... ??
### ensuite prediction sur valset ou "parent test" ou "finetune test"... ??
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment