From 5e6bb738171cbf5adfac79aa0e6191b030402a40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Madon?= <mael.madon@irit.fr> Date: Tue, 15 Nov 2022 15:00:28 +0100 Subject: [PATCH] more tests for distance. All tests pass now --- .gitignore | 1 + distance_batsim_output.py | 8 ++- ...test_distance.cpython-310-pytest-7.1.3.pyc | Bin 11438 -> 0 bytes test/input/3jobs.csv | 4 ++ test/test_distance.py | 50 ++++++++++-------- 5 files changed, 39 insertions(+), 24 deletions(-) create mode 100644 .gitignore delete mode 100644 test/__pycache__/test_distance.cpython-310-pytest-7.1.3.pyc create mode 100644 test/input/3jobs.csv diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..264daca --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*__pycache__ \ No newline at end of file diff --git a/distance_batsim_output.py b/distance_batsim_output.py index 88f1a68..c4bcb3f 100755 --- a/distance_batsim_output.py +++ b/distance_batsim_output.py @@ -66,10 +66,14 @@ def lateness_distance(s1, s2): return np.sum([y-x for x, y in zip(s1, s2)]) def normalized_euclidian_distance(s1, s2): - """Return the euclidien distance normalized by the l2 norm of the vectors""" + """Return the euclidien distance normalized by the l2 norm of the vectors, + or None if one of the vectors is the null vector (undefined)""" + n1, n2 = l2_norm(s1), l2_norm(s2) + if n1==0 or n2==0: + return None eucl_dist = euclidean_distance(s1, s2) - return eucl_dist**2 / (l2_norm(s1) * l2_norm(s2)) + return eucl_dist**2 / (n1 * n2) def l2_norm(s): """Return the l2 norm of the series s""" diff --git a/test/__pycache__/test_distance.cpython-310-pytest-7.1.3.pyc b/test/__pycache__/test_distance.cpython-310-pytest-7.1.3.pyc deleted file mode 100644 index 6b77cc084415056a31fc3552d358e90347969c98..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11438 zcmd1j<>g{vU|{G?EKU~TW?*;>;vi!d1_lNP1_p-WEes3{DGVu$ISf&ZV45kHnTdgs zA(thJl@TPvl*1MRq8W48qu5h9vN+QjQkYX%dYPiQQaKiYc&v*U7cxe1LwRgq9uJhq z4(9Pfc^qIKUkYamR|`WFe+qXBPYXkoKniaPUkgK&U<!YVKnp{ZP>NubaEefrNQ!Wj zXo^UbSc+(hSPMgxc#3$6L<>WdL<(atgQnz5kbnF%8E-La-C|G6$xkdXGtp$c#hOx5 zS&*v9QpCu>!0-|j3NQIUtS^iV3{cS`R*--Q0|P@5I|E4cB}i=%CrAKJXtF#9S-`;X zz`h7%3W7+6h(kz_dL9M_24_%U{$gfeC}AjJEMaVBs%1!FtYxfWsA0@zs%5HSDo!q8 zTELvbRKt|Un8FMaPh(ohlEPBLvVe6VLm5M{QVG)nwi1Sgj791tObggStUSgPmK0XF z3S+Pe#)XVU7GM>O3mNkmQ&^EyxWH9-BC7zgYZw+Xf@#Kuj45n$Sj8A>nN!$PIBJ+P z8O0cCSxPucm=<u>uq<S3W^7_iVPIjX1!1OIMzD{#Kt3vADB%M8jkSiMh85(m6wX?< z8rB-NY^GYq8m6MA8pa8X#e!g;a@8=^Fs3o3aD&v=u%$7h_;x1Pr6AwV1Gx%|Z&!g; zfPA|itODj+9(Z`{2CLv&$XIj$tbz+19y}m6$hSygdjYHxWZPAcEZjDPSr5S~KxRDy ztALt?6t<thDnRb}4psqm4~SjE3JqJfg^Ve@b2!BqYS~lxQuu4w!C}h*4O<TMuw`MW zWn^KfWvyi^VJP7);Rb~<YO0yQSlkK@O`Pdw0%NfQ*kue087D9nxq{pS_n!bPtR^rP z2ZL2{gH=U<RdIvEOCSYVRT`42Y-Cj+HYilVG&yOAr-T>mKSAVNF@dr8D?$Hl0J{m| zzpWtq;r<gs^50>wDju-^PJmVMfc+<wB8;T!8dw!CSk-N?DqgTEVGx^q|D`YlGiZwV zfeW3LjK7#{ZC5f@S*mIjRGMfOb5%nb3bqO$mVsum28cG)EY<-F7-<%3a@8u>LZr<# zi*E^~mL}(9rlcn3#iwKzmn7yTr`}>JHeAVii@BiE07Mvq2qO?-0wT<R@g)`)rxulf z4YbrO1_@feWMg1pSjkibDxtuHCR0@s$ZFDEWoQWV2#5vsi8<6K7BHV!f_+kD1F{38 z4&rqX3%?GS-wlm5i*GR%8)<SD34)4aArK)9BEZG9C<6ln$YT~o;vg1FL8YPLEf8s3 z#gF8DLsQLSu(0VZ*3{gBl1gydev7g67Gs$vUlFK2DB=ehz~xX-S&~{@5}#OfOE5k; zF()U!D7By{IX|}`u_*Nxk3&Iad{Sv<PDy57@h!HT{N%)(;#(5&#TogfIVoVJ@##7F zNr^e}d5O8Hw>aaA6Vp;bCfwq41jj~Zex7SlQGU@a;rO)tqTIxi_|%GmoW#7ulFa<P zA}Nr^<Uxc2h)@JoMx3{}LAFACV~or(L*|%5I1ocEATEV)3=JSWhzV#sBZwSSr3nhp zY$ZdHF#`j`F9-dM{M=Oi+{Dxz{rvJg=bZe~6#er2qU_>=#N<@{q{Nck)I8mi{QR6^ zeNgCu33xiyE2z9B4ib*XOvBuuS`t*7USj;mn8xsc@d;y*DFXuoN|g$d2PIf=m8vpw zs#I2PhFZ2725@D{l)_odUc*+y4y{;Q!4<0rxTr*~SlMdW(JIzC-~tX*)hz@SY4D;F z(TrFNRspKWHi1<@E3y<sWw;Nl0#q4-3RU(JXk`de4Jsm$D%Q(jl_1-0fMnseA<TLL zRsk~WC0GU2EToDRQ~|J+fZX#7tODvD5F1{xvcoG@4sgZF0j^j%p%p79u8Ng~p_Z+d z9Z%)j1rAYsl`E)11vkqkFcx`%+ywU{a<vJnP}xc#m1`_m6}HM1RH4FD6(Fkuu|XjV zrb(_`!Ih67r1B{OWw9b&P~j$+lUS0Pms(s5&zj&uuSft?)`)=!P$60*0b)sliXNt_ zWKg+HTHT-v(gG^ri7o1@Y)RGvuN_P@i@}8^p6a1U8ss0a&tyQM0ID5KP-_P>XzgHD z1S)$qd5X+HT0j*-krIfd3?fuOger(o0}<*VLIXr-f(UrYt;toS1rpE(5jr437ewfR z2z?M?0CEJlB7qjj#wdlZDGJXVTmVA|6OcwRa6yU@k)XP<n1g|V0aT6_Fg#%_vOzCJ zLG>lbOW;y;3A7Y#X2j^01me#D#fsqk3(DbYpwtOZ{fP3!2&@8>f6c)vkn=BUr-Oy0 zP6rprwI$qHJhiN#ZU(GG6k(|4Na3pGOyR2Ks^P5R5@D$2uHmTR7H6pCN#U;LP2sNP ztKqHT6Je<3ui>fT7iXvys9~)UXlASxtYN4T1a(_#n2JGV4XiuH0}@B;j?D%83gqua zU|%tSn~5wbys*Y(@j9>yP|3C#tO8Q9@q*Z(yqv-ZR|zUb!TG)j(lCXT4SZmgNL>do zpB>tj;Dpj#P?{G?^Fe9Bg^Vfub9lrUYK2k+QUq&+z$KzEv_urfQX;Z~N<$WgT6PwO zS`HS5T22;*S}qobT5cAGS{@dLT3!~0T0RzrT7DLWT7g=@5{439Pz<C9p*7g^@TZsw zjKwzKkbyMV9YMZ=ryAs@xIb7Gq$wT(R)xDMPFl+01G`%U&E2O6x_buLMG$v`;vdlo z7lp;;1jgdkU{!qJE(EA+!d}7$P8*^iKY(H$OhZa>65I_g#l?_HaZSc515n{=3~P8B zYZgOW(MUZAq>clqTw~8MiqFe0%7qkdEQnS#sM%}`BFw6gdkPks#gGycG$v96>Mj(4 z$3z&bR+D76kr~XDAQse_<}fjHxXVp7i}i{Ni%Pg^H8eE9y&y}?Vg)U5uLV?#Xewwc zfQ1cB5aMQ<#hP#lBLldE5vb#&smWCfuGnrd<rQf1Abeb82P#3C3o1>EK>Y@Am!Sw$ zClr~33T6wCEK5P9kpZZyU}OX)%}{CzBMV4PVPx@=38V$wN(YVj6oH%S{6)4PQBWm9 zbj1NGl8ZniZ?{-L!B7M?5^S0Xq(u*@C5(zdbqTVF34{Y_xtk+%EQ>&u4@3l_+oA|w z^%Plxyop@@Kx-U`N<(u<{R8DeI}%WyC9<56Aqo##i$PQxK|2yq9@GYiQ;ncDKzWvs z$_>=dC<51TJdhd=l4?P%l44N%e;wmL#ukPhjC&Z1!qICsP)Y^Wec+ny8Uu95jR`!s zfIj5LynqGEkQ?g)Ht3KWGi0ExNWFx40SCO4Vl9EGFfIY90M%_4U=`51ErkuP!Ue2? zX(3~gCs+j&xGZ1;vB6~l^8$_<P+7nZ9dhGH;e-#lah5PI;KDZK1}+P@{2*hFh!&kJ zC}$_;WLBl7Ks#5NNCO0jw1n2g)8sCS03}p-S|c{KYO>;P+<;>cOn{?N2^@_W?n55H z0A;;mP(HfGSQN>?z)&R*(xac5S5R7_Z=98%R2*L(Uz}Q8oSC0jte0F|Rwad@penT} zA0h{~5Jj_LX<l(@a&l^Maaw5(Sid5Y{@mnvLxYsW%HsG$GlO`LZ6GBm9Z%4RE<9mX zASEnDh7#rlEG4W98T)w|AtStO%}k68;S8X0QWl10W=4iQ4nu}wsd7d}1`v#70PA5d z;aJGn&j;1RiLM6}&4vucrpUTLVw#M8nq0RyVIx1qw?s-Zic(X-u118>Eq)|XP{7<` zgN$a}Vl7WB$}7Ib9a@}P6rNa=mzkG-OBzXWeqL%ka$wzJD@x2PPA$I0;hkCu8L!|) z2`{8fi->YwQ0T@d=cFcrEC98Qi$NnRTueO7Jd85`d02QDmHro{pl4rD2?$Ek;OxuC z&A^btn9i8Ol+KvKoX*J1%*0&FP|H}$1Rix}tYJ)H>1D2EPGPNOsbQ{R$!4l$tzj*0 z1Xty3HLPijDeNF|aN~^wR!JA@gR5vz<IMzA8ACHRM+zrg1!T+=l(9h_Ja$m`GmkNa z6G;WAVGQY(hJnq1WPQ#QF1Q&<AX`A0yC@x`0>sK=OyL5vnW4=zmW7Nd+;iB(7;4#4 zcv5(4*uc#+c4#w=9jTed3?2z)f%=;PtH1ve@HeDs0`fPcVFGEiBKvy}$ju<%799k+ z6U0LE_gT1#ODHN3&Dnb(6`;bc=rKqIh?U2f0%^{Y?e7%+T8<QfT283{Ichj-7*Yg# zS!=mcSZld!xN5kwnQD1zcqT9wPk@F3Pa0#25J(0!_9rkF8-QF0iuDPMMW!G(g4lVC zDM%qPfw9;btcnrbkn;emg2gdBolIaX2E{%&9ZX;>ibhriV%Km$<C$|IC<LH$ITIL* z3qfXsLS_PEQ5jezBrYJK1Z&iDFJw#+p2H!=P|KSlk|J8e3r-z;(A2?)lsb?b>^!yH zSVKz;OK33_gJzSMQ`l-ip(Tzrw2p(^3JReKj74WaZUwP1LhBY-6(qFogH>S*t+z<3 zKr<%roCVT>Bea-`L32-R;Lu_!f_I?cX^P^|0;ef1cv&KWT$W5=DhA~hPzd2DO(rlE zLxy)i9RsE!$nXv%Hj&d4Q!!+C2NXg~MUdegSO`hN%8Ut2#dY8i;#$Z!fvKn&6e`f+ zHAzItGJ&ahB1jdeM47-;G!3K*#HLE=!d1dp!vrl|P(y;b7?fA=hs0fwyFlq<0#nf= zkh?%^a9Tn3;|H)Rh#x`2d$1uyNksWNfw`C&tO^uf%tfFS08c9^Qt<F%E*1l+s$p8l zIDxrH8e}7goyV9W1!j|7-ayj?tellbNfXS)yP#zZt~9}13@-gazMsGhDgQxia7Z9Z z59Z=hkn2IE>;&ebDv*moY;cK-2ruU14zMavcrh3Cf>l8V6C@FX2oso#XM<FMa|v_N z0+1>Y8=gzZ4hfX9gsTQr@-jk8UKx~<m!;Sj8Y-Z&1Y60=Tnx$opq#>71j+xHIfbQ| z0pt@<NK9ZUVg>mG#6}AhmSWJH92+QiOkgRJ0IPzA3W$wTmcZ-e2`t5WAhSWaWdchP zXvUl!#D?b_ipvtD&;sQoP-rpX2rbaiHz+sZ3N4o67LZ#(>1F~;Q76b(AT~G+AyNlR z@pP~%Q0ibQ0uAqD%S|lB@cHryEJesO=A0mNQ9=umyI6{kg3Jcxs0l1ZpaBwg5F4Jm zs2N&}C6KfQtJP7{7He@efwaY149fkGp?KCJ1(2`cp@qnCtdRHzrKbt3koX6&(b5)c zF=$u{lD1fj+`wkxN?Y)D&ji-u7;tETQa5W+B1jiX4x~J^K(olQez&;OGV?NvGvZ4! zb5n2e7ndgGW`cV~5CPuelEk8t%)E3k7qsLhEi*MI1w8i*9{$&4tnvU2-k8FMUrgb{ zFGvGN@Tq4V1+acwkQQU`{4H#}9W*!;1?o9Qg9Z_pi%XL<*^tIp!1J=8L6#y%kRs5) zJIe5hp#fz0#L(atb6RGeTZks}El&8P;4My2ZxP&&Ey@5*KQLD1gPdRqb2VsS2r?s% z-PLfDz%DldO+mv=1Phsggb;o+NAa5>XzYjs-8Z07^CEkYZ{R~lpm8BXBUF!p<{?0X zNSe$=IiR57fQ`_@e3D10PhbWT=aH92AScIwoWcrD9WO!Mt|HJ-l_qx)JPL{J_ZNXo z(&R772AP@(A__r-C&+xH*=cj|z*CVoNEkfA1R3KoG=z*AfJU&2eBhFh!68FK@Yob& zED5UKxX21*Jh*2GCcq;K!r<;@aei(peEzB!G*?`FjD>+gj8TD^iIML=6C=xi7Dl%J zOpHumHi!oCK^Vja$%AMRAA~_{{9-Invq0v7umqGZ@Lz$6-ZttX*~kHP2gI)+zpyY8 zBZlrDP<Wy1LstXyv&8>_cYzWMBGl2{&Gw(TFu<k`m!AdxD^VfT2fK}WR0vh{5FBbL zYoOD%%>SZtP`xL4iz5lT#;6E1>wHTx9x{%n2PrZ$^YiqIQp<}nOHyxf!i8?JL&d;_ z`YkcoU}1bxVo7mkZhU@eNkM5zQ7owZWhzLy#TJ}el$lx#o)-d7mw+eIk;aTcWh;0Z z89d?+9-%D))%)PepeO-ULW4$2!2^0=hhrTS<gm%jPbtkwwF51wEe1`eD=}~|@-gu- YLLdh-2O9?~2OA4J3nK_}F!AsL0KQn2(f|Me diff --git a/test/input/3jobs.csv b/test/input/3jobs.csv new file mode 100644 index 0000000..e899f77 --- /dev/null +++ b/test/input/3jobs.csv @@ -0,0 +1,4 @@ +job_id,workload_name,profile,submission_time,requested_number_of_resources,requested_time,success,final_state,starting_time,execution_time,finish_time,waiting_time,turnaround_time,stretch,allocated_resources,consumed_energy,metadata +1216,user11,362,30,1,86400.000000,1,COMPLETED_SUCCESSFULLY,30,362.000000,80,0.000000,362.000000,1.000000,2,62671.250000, +247,user5,57102,0,8,432000.000000,1,COMPLETED_SUCCESSFULLY,0,57102.000000,0,0.000000,57102.000000,1.000000,0,12391134.000000, +1242,user11,9620,40,1,86400.000000,1,COMPLETED_SUCCESSFULLY,40,9620.000000,60,0.000000,9620.000000,1.000000,2,1665462.500000, \ No newline at end of file diff --git a/test/test_distance.py b/test/test_distance.py index af1fbeb..536faec 100644 --- a/test/test_distance.py +++ b/test/test_distance.py @@ -32,6 +32,7 @@ def test_normalized_euclidean_distance(): ####### Integration tests ####### +three_jobs = "test/input/3jobs.csv" three_jobs_w_session = "test/input/3jobs_w_sessions.csv" three_jobs_zero = "test/input/3jobs_zeros.csv" three_jobs_one_unsuccessful = "test/input/3jobs_1unsuccessful.csv" @@ -43,38 +44,43 @@ def test_cleaning(): # Clean unsuccessful jobs: with pytest.warns(UserWarning): - distances(three_jobs_w_session, three_jobs_one_unsuccessful) + distances(three_jobs, three_jobs_one_unsuccessful) # Complain if no matching job_ids: with pytest.raises(KeyError): - distances(three_jobs_w_session, mc_10days_a60) + distances(three_jobs, mc_10days_a60) def test_some_distances(): fin, sub, start = ["finish_time"], ["submission_time"], ["starting_time"] # d(u,u) == 0 for all distances - assert distances(three_jobs_w_session, three_jobs_w_session, - field=sub) == 0 - assert distances(three_jobs_w_session, three_jobs_w_session, - field=fin) == 0 - assert distances(three_jobs_w_session, three_jobs_w_session, - euclidean=False, norm_eucl=True, field=fin) == 0 - assert distances(three_jobs_w_session, three_jobs_w_session, - euclidean=False, lateness=True, field=fin) == 0 + assert distances(three_jobs, three_jobs, field=sub) == 0 + assert distances(three_jobs, three_jobs, field=fin) == 0 + assert distances(three_jobs, three_jobs, + euclidean=False, norm_eucl=True, field=fin) == 0 + assert distances(three_jobs, three_jobs, + euclidean=False, lateness=True, field=fin) == 0 # Eucl distance - assert distances(three_jobs_w_session, three_jobs_zero, - field=sub) == 50 - assert distances(three_jobs_zero, three_jobs_w_session, - field=start) == 50 - assert distances(three_jobs_w_session, three_jobs_zero, - field=fin) == 100 + assert distances(three_jobs, three_jobs_zero, field=sub) == 50 + assert distances(three_jobs_zero, three_jobs, field=start) == 50 + assert distances(three_jobs, three_jobs_zero, field=fin) == 100 + + assert distances(three_jobs, three_jobs_w_session, field=sub) == 0 + assert distances(three_jobs, three_jobs_w_session, field=start) == 0 + assert distances(three_jobs, three_jobs_w_session, field=fin) == 20 * np.sqrt(2) # Normalized eucl distance - assert distances(three_jobs_w_session, three_jobs_zero, - euclidean=False, norm_eucl=True, field=sub) == 1 - assert distances(three_jobs_zero, three_jobs_w_session, - euclidean=False, norm_eucl=True, field=start) == 1 - assert distances(three_jobs_w_session, three_jobs_zero, - euclidean=False, norm_eucl=True, field=fin) == 1 \ No newline at end of file + assert distances(three_jobs, three_jobs_zero, + euclidean=False, norm_eucl=True, field=sub) == None + assert distances(three_jobs, three_jobs_w_session, + euclidean=False, norm_eucl=True, field=sub) == 0 + + norm_dis_A_B = distances(three_jobs, three_jobs_w_session, + euclidean=False, norm_eucl=True, field=fin) + norm_dis_B_A = distances(three_jobs, three_jobs_w_session, + euclidean=False, norm_eucl=True, field=fin) + expected = 800 / (100*100) + assert norm_dis_A_B == norm_dis_B_A + assert norm_dis_B_A - expected < 1e-8 -- GitLab