From df43ddab786f9c6438c24f9ee6842973ba25b498 Mon Sep 17 00:00:00 2001 From: Caroline DE POURTALES <cdepourt@montana.irit.fr> Date: Wed, 11 May 2022 15:28:20 +0200 Subject: [PATCH] correction on regex : confusion np/n --- .../Linker/__pycache__/Linker.cpython-38.pyc | Bin 5536 -> 5231 bytes .../Linker/__pycache__/utils.cpython-38.pyc | Bin 7029 -> 7037 bytes SuperTagger/Linker/utils.py | 4 ++-- SuperTagger/__pycache__/eval.cpython-38.pyc | Bin 1775 -> 1770 bytes train.py | 15 +++++++++++---- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/SuperTagger/Linker/__pycache__/Linker.cpython-38.pyc b/SuperTagger/Linker/__pycache__/Linker.cpython-38.pyc index d796c7a4452db32c4eb9d9237b80fc4bab334428..e732b7dd6a2e215b497d136ba944e5e832aeacd6 100644 GIT binary patch delta 510 zcmZ3W{a%AFl$V!_fq{X+YE5<0YVnPHlUW!|CNE;q=W1tQVTfW6X3*r^{Fo(}i7{%j zIs2K->YVD#j3JZ#d7RmAv6kc)C1*^Y&ZB5!#lXO@lC{W|fq|jOj)8$8iZ8JwKesqO zH7_|oB{MI*$Q~pk2~x#a<N#vJg9t|k28NK$UwE_`6D>h}8xSD}BH+?d3=9m#AQ!SR zurUfTiZSy1Eph^>a0U?`Ai@(wc!3C1T^tMy3~Ve+e^i)x{=Q_K+`uQoXgYZ+pEkD{ zNUb@DaG89XPff^`fq_Ajvk2rou<MKLCo}WM@q^6;8>z^^z~HdCfM1GH$q%H>8$|en zbTQsyPRY#G<hsRFl3(Nk5{{a@M&LeU#^xTuBt}(`YE7mhH;_aYNIe(WmRnpYi6x0S z`H3m1MMWu-g@p~dlR-){Kt$$be_=Vs^2vF^dWsxHN+2O+5TODh#6bkeIYl7jigG6( z5>`_Md6a{ZN0vi_gTF`}WR2o2&die3qLTdloZ=!TkRF%G{33yjiIWpWW-~@i{wuN= E0Bs~|bpQYW delta 825 zcmaE_u|S(Il$V!_fq{WxMMYK8E{TnNlUW#TCNE;q7fRtw;c8)sVsB?)VTj@gX3*r` ze48bhiSgECefBf;Odvf_%*?>Rz{<eD;LO0lP;AA(z)-@F#aP2w!jQ$3!r04H%ap=Y z!&Jk#fVqYti)A6B4MPpn0@f7fg^UZ>QkZKPv)EIZB^gqf7cvJkXtMZeGTvfMDXA<- zO@|uK@RE^%fkBhw7Hdg<QF6vD&iuU8`r`PU%<R-#>}fgqi6v$xx0q53i%b|87;bTv z<`ox~rlwY<-eN0CEl5o)xy4$Xkywzb$y_7}Qox;9lAl`~pOjdVoDn6MlA4^Kl3Em> zSW=P~pPN{mT_nfAzyKlSLCOVl6SGs}Vah-%92giFREya_u45En6kx29@X5@}PA$^I zP`UXRy9P63<YZ-TXVxNX1_p-7x!j62ZVU_zD_M&?LH2ktFfc^%L0puYmz<xHnU`MV z4Uz%5btPkw4~PwNNRclC14HEIQ{38&^{ya64-f%%EL=Js6ak>vWMg1s6k-%(<oR3V z2U6w_B7#9g2#5#;5vaO27#JAXSeX8(F!TH^0>$A=P^^ODBC{m5s8~~=$P5%3EIFCQ zC6h~dmAK78W>|m->&Xjw)r0~-al{D@;v$f*io7S^<c;G8+XuEvk%581b+a3v6r&Qz z{Gu?B-bj!x##_uOnYo%=x0p)u!NF9NGP#NWK4b3YB!MJGUXW@{rlLTQ`jW{P1;rWj zCchLk)XD`36oH6h5CIMfFafge7FTLnVorQ&L4I;Z&14rLJvGiEC6F>@5TODh#6bio y%@%>2R#XDwmQS86q{ambLJmeASq|382Zdx9tta0T3S`WlY#=<FF=_G{;l%*j8o!<Z diff --git a/SuperTagger/Linker/__pycache__/utils.cpython-38.pyc b/SuperTagger/Linker/__pycache__/utils.cpython-38.pyc index b13bf57006d2d3eb78625e40b86218f03e27798c..49d3c94ae3d2a9e58f9576ec4e25c73d5ea5aa19 100644 GIT binary patch delta 1667 zcmexr_ScLzl$V!_fq{YH&d%y2=Z(DPOk6e$3=HWEH4L#Pll_>a6jK<)89Ep{m?ar% z7$q4NGBq<6vD7e4WGZ9{W?0F1i!pw36H_4L%E{N6G8ttiJ2E>e^D;0ntYj_{WME(@ z5&{vzAOXfA5fEDpM2Jsb$~+s{A}<!HdVCfYv4OO4fCwHCApjyoK?KA+1_lO61_p*= zkP0>iHU=I>ng2x+AbF5*ku-=U10rNW1gahu1_lN;MyAR4SvFaN4A*43C6E(eke`!S zlvz?4pPQLiS`2YJs)ceO3&ENh*cgf=CZAzd6qaIOV7SGSlb@Fkwo{X-NPZHVC^wR= zlNH#i85JkbVN+ogo_vtaUlC-eCM#NK70H9#sRAOzCL6LlF&a)TVZUm^2@(KB*DcoE z#FFHUl}tsTzy@nx$#{z+Ik6-)J-?{32xMpx#G8|AIqJZcKo#F&PEIU=C_q*v!zsn9 z4RSqeQEGZ>MUm-bXHI)Yi^<KLI>y!@Yr%@mK`gMj_6!UR>Yxw+MFj(+7$XNG8<PMd zA0r2&0ArE;<g+|cyp{|M44O<uAf-imlRdfAr5r)}EkFd=I4}V=PIGb>mo%g1<Yip9 z<&8k{CLqELM1bu?s1%>PfV({kWHiJRRv<NCX)pmcO`L&&!4u>-P_kiTU}E54(Bv#) z2B`%3u?Q4|MW$firWfUx78HTfe330k-fnUukGQA<hzT|iY&+Oc&&g|f<QY9dK2ro) zugO#dvZF{3q}ByQfHi;#vB@61I*eYMOL^N_2>M4_s9hdpARhlrJ}4x??9RY2`J%9k zHfs2+WW2?hlbTi%U!0ke3NZrgTu>MkPR<eY*TGQFRg{^YfmK5}NP{w}h9W<>-{1z< zgVdpWEC5a2<Ygk>1U)7o8cKwHlR@^Ot3$Vsfq`KrNFBPn(A_^dLo7HJ&5u~jS`JD; zko?Vrl6-D)6_+Fyl@ym}mSjL;ADnzZ=?RvGB*c{%EhbxwtEf4F0>v3bfD(o#a}hZ8 z`-21mK?FERgD1C&XEO$G{wUtcs1H&Hjz>*s?&bp*pHW;nnZ+gXNuYcWmWJi;rIPk) zYM`9PUK9*65u6?sK`bQ@0rEkS4v3{YnM+Dm3tS?*F)%Pd4NNS_&n*VK>J}5MU<d&z z7n>X?Wg#05GFcdsV?f@##hO=|TTpq6DX*YNaq=>$R10vnF$C!VXLL;v3!E{$KrFD6 RK!!lv4EJ1-*kosEBLF4QB!vI~ delta 1691 zcmexs_SK9xl$V!_fq{WRZDw_n^+sNECN2vG28MKo8irVd$$m^ylXI9P85t&bG6k{} zu`w`AzRi@$BJuy1-sIiP%9FF1rImRY7#LPE7YQ&hFcb-b2qBOFW05e3EdnA$C$D6l zjbbc|)Z`o%IX`xgEH{YY2N7%_0-}R~fkBdifuWd#fq{XIfsKKOQR07*7)V|mM1bUq zq(Cfb5CPFM`3B2DYmjN0Ot%Db;tTR~5{oiRD&uoA^Gb^$&Ox<74rBuh)P^Fl$yZnv zjU^Zu7;drT<maUqfh^HvDv||>@FSU5kW*S*glw+{TQ#Hn<Rxq>j6#!7viU25Y|&&z z3lXq6MM_}jPPSxsV$`3!j!Sm(O!gfnAWs%?f~;Z9O)N>ySjkib3RJKGD;aNbBqx@n zrso$`g2JDn2<q8ZjyeyJMNq}Jn3EGrAPPXDP*n^J47wmsf`r%@m>4)1G}()oL2l3l zTUV5to?204JlUJmp3!V_H>ZxMB}fvayvP*90$XM~`3$ElqwVBZoKd{y3=9mKOhvXJ zNu9}oT<T`_Af_3J0BZviU~TFQ3=GB~+xbB0i;;zq1B?Y2c^He-C$HnWEpGtQWdtHj zKm^!EgnrS<E4bU`K^8*%1xl9?7P1;Y9w}dtXTSki1hVfIYkE<BX+e<{$V_VxVFU84 zAS@W;OY-9rOY(Dzi|jy>V8g&xgI(=Dc`J`RqdUm6iXgi+nTp&&%5*@46Nmt7023mU z19){9JvP_!wzClQjIvO>HON4S7qEJUfq@|h6ks63A<<RD2(r<Yfq`N2K|yDA)Bsz_ zc#AV9HLWDRI5Q;`Vl3DTqLclFgLN>}ausE!XJ9I3U|=Yp>>%Qg?g1Z=%^;UR)6!%c zkzjN+e&}k}iFgx?F$vL7WmLx%q1y%WFE%$}*!NEq!%gUBO)d}%jzzN%-5dr6hUK7u zhZF`(44RxsxrD2@B(bQZxID8YqX?V|zzGYK(uyFdYqEm45~JB<XK@uZ2T*`Gf(TH^ zX)+gqGlnlnz#l|_<0o)(uXr|N;O3v=t&I90b>O(ugywo)aPb+%m6KUq5}yPr55Ur} zSXe7*U#|kngzQCuAQQnZk_WLAKm^DKMOq-1Hb@cMEz#V>3P^H~&&-Q2PR%Px%}Y*& zl+-Q^3=B|<AQ=MW+FMMpf+PrJ49LZ|gwrzfQsN8pi{tZB)8i*+OR33*f{Yi2<T_A* z++xiu%`K?B#gtc2BtLnbRH_9yaP&cDfJ+NdaRA9m9w0HW<3WZ%d<PGPB9Y17(nbJ$ COg1$D diff --git a/SuperTagger/Linker/utils.py b/SuperTagger/Linker/utils.py index 3f8e892..e5708c1 100644 --- a/SuperTagger/Linker/utils.py +++ b/SuperTagger/Linker/utils.py @@ -21,9 +21,9 @@ def get_axiom_links(max_atoms_in_one_type, atoms_polarity, batch_axiom_links): for atom_type in list(atom_map.keys())[:-1]: # filtrer sur atom_batch que ce type puis filtrer avec les indices sur atom polarity l_polarity_plus = [[x for i, x in enumerate(atoms_batch[s_idx]) if atoms_polarity[s_idx, i] - and bool(re.search(atom_type, atoms_batch[s_idx][i]))] for s_idx in range(len(atoms_batch))] + and bool(re.search(atom_type+"_", atoms_batch[s_idx][i]))] for s_idx in range(len(atoms_batch))] l_polarity_minus = [[x for i, x in enumerate(atoms_batch[s_idx]) if not atoms_polarity[s_idx, i] - and bool(re.search(atom_type, atoms_batch[s_idx][i]))] for s_idx in + and bool(re.search(atom_type+"_", atoms_batch[s_idx][i]))] for s_idx in range(len(atoms_batch))] linking_plus_to_minus = pad_sequence( diff --git a/SuperTagger/__pycache__/eval.cpython-38.pyc b/SuperTagger/__pycache__/eval.cpython-38.pyc index ec90d972078edc0b20a9275119656a5973b657f9..fce253cccfad91c0b80dd6cd68cc5e05e3437104 100644 GIT binary patch delta 45 zcmaFQ`-+z@l$V!_fq{X6e|2@zOO}m%IjjN#Nr@%N8Sy1WrK#~bnR(fp`&knh0V~}O AB>(^b delta 50 zcmaFG`<|CCl$V!_fq{YHT47ZZ6YEC499B`$oXouJ%)IpYf}GOg_>%ni+|0bv&0VYs Fi~w~i5DNeR diff --git a/train.py b/train.py index 05d223f..d4e4533 100644 --- a/train.py +++ b/train.py @@ -43,6 +43,8 @@ learning_rate = float(Configuration.modelTrainingConfig['learning_rate']) file_path_axiom_links = 'Datasets/aa1_links_dataset_links.csv' df_axiom_links = read_csv_pgbar(file_path_axiom_links, nb_sentences) +sentences_batch = df_axiom_links["Sentences"] + atoms_batch = get_atoms_batch(df_axiom_links["sub_tree"]) atom_tokenizer = AtomTokenizer(atom_map, max_atoms_in_sentence) atoms_batch_tokenized = atom_tokenizer.convert_batchs_to_ids(atoms_batch) @@ -51,14 +53,19 @@ print("atoms_tokens", atoms_batch_tokenized.shape) atoms_polarity_batch = find_pos_neg_idexes(max_atoms_in_sentence, df_axiom_links["sub_tree"]) print("atoms_polarity_batch", atoms_polarity_batch.shape) +torch.set_printoptions(edgeitems=20) truth_links_batch = get_axiom_links(max_atoms_in_one_type, atoms_polarity_batch, df_axiom_links["sub_tree"]) -print("truth_links_batch", truth_links_batch.permute(1, 0, 2).shape) -print(" truth_links_batch example on first sentence class cl_r", truth_links_batch[0][0]) +truth_links_batch = truth_links_batch.permute(1, 0, 2) +print("truth_links_batch", truth_links_batch.shape) +print("sentence", sentences_batch[14]) +print("categories ", df_axiom_links["sub_tree"][14]) +print("atoms_batch", atoms_batch[14]) +print("atoms_polarity_batch", atoms_polarity_batch[14]) +print(" truth_links_batch example on a sentence class n", truth_links_batch[14][2]) -sentences_batch = df_axiom_links["Sentences"] # Construction tensor dataset -dataset = TensorDataset(atoms_batch_tokenized, atoms_polarity_batch, truth_links_batch.permute(1, 0, 2)) +dataset = TensorDataset(atoms_batch_tokenized, atoms_polarity_batch, truth_links_batch) # Calculate the number of samples to include in each set. train_size = int(0.9 * len(dataset)) -- GitLab