From 5e6bb738171cbf5adfac79aa0e6191b030402a40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ABl=20Madon?= <mael.madon@irit.fr>
Date: Tue, 15 Nov 2022 15:00:28 +0100
Subject: [PATCH] more tests for distance. All tests pass now

---
 .gitignore                                    |   1 +
 distance_batsim_output.py                     |   8 ++-
 ...test_distance.cpython-310-pytest-7.1.3.pyc | Bin 11438 -> 0 bytes
 test/input/3jobs.csv                          |   4 ++
 test/test_distance.py                         |  50 ++++++++++--------
 5 files changed, 39 insertions(+), 24 deletions(-)
 create mode 100644 .gitignore
 delete mode 100644 test/__pycache__/test_distance.cpython-310-pytest-7.1.3.pyc
 create mode 100644 test/input/3jobs.csv

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..264daca
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*__pycache__
\ No newline at end of file
diff --git a/distance_batsim_output.py b/distance_batsim_output.py
index 88f1a68..c4bcb3f 100755
--- a/distance_batsim_output.py
+++ b/distance_batsim_output.py
@@ -66,10 +66,14 @@ def lateness_distance(s1, s2):
     return np.sum([y-x for x, y in zip(s1, s2)])
 
 def normalized_euclidian_distance(s1, s2):
-    """Return the euclidien distance normalized by the l2 norm of the vectors"""
+    """Return the euclidien distance normalized by the l2 norm of the vectors, 
+    or None if one of the vectors is the null vector (undefined)"""
 
+    n1, n2 = l2_norm(s1), l2_norm(s2)
+    if n1==0 or n2==0:
+        return None
     eucl_dist = euclidean_distance(s1, s2)
-    return eucl_dist**2 / (l2_norm(s1) * l2_norm(s2))
+    return eucl_dist**2 / (n1 * n2)
 
 def l2_norm(s):
     """Return the l2 norm of the series s"""
diff --git a/test/__pycache__/test_distance.cpython-310-pytest-7.1.3.pyc b/test/__pycache__/test_distance.cpython-310-pytest-7.1.3.pyc
deleted file mode 100644
index 6b77cc084415056a31fc3552d358e90347969c98..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 11438
zcmd1j<>g{vU|{G?EKU~TW?*;>;vi!d1_lNP1_p-WEes3{DGVu$ISf&ZV45kHnTdgs
zA(thJl@TPvl*1MRq8W48qu5h9vN+QjQkYX%dYPiQQaKiYc&v*U7cxe1LwRgq9uJhq
z4(9Pfc^qIKUkYamR|`WFe+qXBPYXkoKniaPUkgK&U<!YVKnp{ZP>NubaEefrNQ!Wj
zXo^UbSc+(hSPMgxc#3$6L<>WdL<(atgQnz5kbnF%8E-La-C|G6$xkdXGtp$c#hOx5
zS&*v9QpCu>!0-|j3NQIUtS^iV3{cS`R*--Q0|P@5I|E4cB}i=%CrAKJXtF#9S-`;X
zz`h7%3W7+6h(kz_dL9M_24_%U{$gfeC}AjJEMaVBs%1!FtYxfWsA0@zs%5HSDo!q8
zTELvbRKt|Un8FMaPh(ohlEPBLvVe6VLm5M{QVG)nwi1Sgj791tObggStUSgPmK0XF
z3S+Pe#)XVU7GM>O3mNkmQ&^EyxWH9-BC7zgYZw+Xf@#Kuj45n$Sj8A>nN!$PIBJ+P
z8O0cCSxPucm=<u>uq<S3W^7_iVPIjX1!1OIMzD{#Kt3vADB%M8jkSiMh85(m6wX?<
z8rB-NY^GYq8m6MA8pa8X#e!g;a@8=^Fs3o3aD&v=u%$7h_;x1Pr6AwV1Gx%|Z&!g;
zfPA|itODj+9(Z`{2CLv&$XIj$tbz+19y}m6$hSygdjYHxWZPAcEZjDPSr5S~KxRDy
ztALt?6t<thDnRb}4psqm4~SjE3JqJfg^Ve@b2!BqYS~lxQuu4w!C}h*4O<TMuw`MW
zWn^KfWvyi^VJP7);Rb~<YO0yQSlkK@O`Pdw0%NfQ*kue087D9nxq{pS_n!bPtR^rP
z2ZL2{gH=U<RdIvEOCSYVRT`42Y-Cj+HYilVG&yOAr-T>mKSAVNF@dr8D?$Hl0J{m|
zzpWtq;r<gs^50>wDju-^PJmVMfc+<wB8;T!8dw!CSk-N?DqgTEVGx^q|D`YlGiZwV
zfeW3LjK7#{ZC5f@S*mIjRGMfOb5%nb3bqO$mVsum28cG)EY<-F7-<%3a@8u>LZr<#
zi*E^~mL}(9rlcn3#iwKzmn7yTr`}>JHeAVii@BiE07Mvq2qO?-0wT<R@g)`)rxulf
z4YbrO1_@feWMg1pSjkibDxtuHCR0@s$ZFDEWoQWV2#5vsi8<6K7BHV!f_+kD1F{38
z4&rqX3%?GS-wlm5i*GR%8)<SD34)4aArK)9BEZG9C<6ln$YT~o;vg1FL8YPLEf8s3
z#gF8DLsQLSu(0VZ*3{gBl1gydev7g67Gs$vUlFK2DB=ehz~xX-S&~{@5}#OfOE5k;
zF()U!D7By{IX|}`u_*Nxk3&Iad{Sv<PDy57@h!HT{N%)(;#(5&#TogfIVoVJ@##7F
zNr^e}d5O8Hw>aaA6Vp;bCfwq41jj~Zex7SlQGU@a;rO)tqTIxi_|%GmoW#7ulFa<P
zA}Nr^<Uxc2h)@JoMx3{}LAFACV~or(L*|%5I1ocEATEV)3=JSWhzV#sBZwSSr3nhp
zY$ZdHF#`j`F9-dM{M=Oi+{Dxz{rvJg=bZe~6#er2qU_>=#N<@{q{Nck)I8mi{QR6^
zeNgCu33xiyE2z9B4ib*XOvBuuS`t*7USj;mn8xsc@d;y*DFXuoN|g$d2PIf=m8vpw
zs#I2PhFZ2725@D{l)_odUc*+y4y{;Q!4<0rxTr*~SlMdW(JIzC-~tX*)hz@SY4D;F
z(TrFNRspKWHi1<@E3y<sWw;Nl0#q4-3RU(JXk`de4Jsm$D%Q(jl_1-0fMnseA<TLL
zRsk~WC0GU2EToDRQ~|J+fZX#7tODvD5F1{xvcoG@4sgZF0j^j%p%p79u8Ng~p_Z+d
z9Z%)j1rAYsl`E)11vkqkFcx`%+ywU{a<vJnP}xc#m1`_m6}HM1RH4FD6(Fkuu|XjV
zrb(_`!Ih67r1B{OWw9b&P~j$+lUS0Pms(s5&zj&uuSft?)`)=!P$60*0b)sliXNt_
zWKg+HTHT-v(gG^ri7o1@Y)RGvuN_P@i@}8^p6a1U8ss0a&tyQM0ID5KP-_P>XzgHD
z1S)$qd5X+HT0j*-krIfd3?fuOger(o0}<*VLIXr-f(UrYt;toS1rpE(5jr437ewfR
z2z?M?0CEJlB7qjj#wdlZDGJXVTmVA|6OcwRa6yU@k)XP<n1g|V0aT6_Fg#%_vOzCJ
zLG>lbOW;y;3A7Y#X2j^01me#D#fsqk3(DbYpwtOZ{fP3!2&@8>f6c)vkn=BUr-Oy0
zP6rprwI$qHJhiN#ZU(GG6k(|4Na3pGOyR2Ks^P5R5@D$2uHmTR7H6pCN#U;LP2sNP
ztKqHT6Je<3ui>fT7iXvys9~)UXlASxtYN4T1a(_#n2JGV4XiuH0}@B;j?D%83gqua
zU|%tSn~5wbys*Y(@j9>yP|3C#tO8Q9@q*Z(yqv-ZR|zUb!TG)j(lCXT4SZmgNL>do
zpB>tj;Dpj#P?{G?^Fe9Bg^Vfub9lrUYK2k+QUq&+z$KzEv_urfQX;Z~N<$WgT6PwO
zS`HS5T22;*S}qobT5cAGS{@dLT3!~0T0RzrT7DLWT7g=@5{439Pz<C9p*7g^@TZsw
zjKwzKkbyMV9YMZ=ryAs@xIb7Gq$wT(R)xDMPFl+01G`%U&E2O6x_buLMG$v`;vdlo
z7lp;;1jgdkU{!qJE(EA+!d}7$P8*^iKY(H$OhZa>65I_g#l?_HaZSc515n{=3~P8B
zYZgOW(MUZAq>clqTw~8MiqFe0%7qkdEQnS#sM%}`BFw6gdkPks#gGycG$v96>Mj(4
z$3z&bR+D76kr~XDAQse_<}fjHxXVp7i}i{Ni%Pg^H8eE9y&y}?Vg)U5uLV?#Xewwc
zfQ1cB5aMQ<#hP#lBLldE5vb#&smWCfuGnrd<rQf1Abeb82P#3C3o1>EK>Y@Am!Sw$
zClr~33T6wCEK5P9kpZZyU}OX)%}{CzBMV4PVPx@=38V$wN(YVj6oH%S{6)4PQBWm9
zbj1NGl8ZniZ?{-L!B7M?5^S0Xq(u*@C5(zdbqTVF34{Y_xtk+%EQ>&u4@3l_+oA|w
z^%Plxyop@@Kx-U`N<(u<{R8DeI}%WyC9<56Aqo##i$PQxK|2yq9@GYiQ;ncDKzWvs
z$_>=dC<51TJdhd=l4?P%l44N%e;wmL#ukPhjC&Z1!qICsP)Y^Wec+ny8Uu95jR`!s
zfIj5LynqGEkQ?g)Ht3KWGi0ExNWFx40SCO4Vl9EGFfIY90M%_4U=`51ErkuP!Ue2?
zX(3~gCs+j&xGZ1;vB6~l^8$_<P+7nZ9dhGH;e-#lah5PI;KDZK1}+P@{2*hFh!&kJ
zC}$_;WLBl7Ks#5NNCO0jw1n2g)8sCS03}p-S|c{KYO>;P+<;>cOn{?N2^@_W?n55H
z0A;;mP(HfGSQN>?z)&R*(xac5S5R7_Z=98%R2*L(Uz}Q8oSC0jte0F|Rwad@penT}
zA0h{~5Jj_LX<l(@a&l^Maaw5(Sid5Y{@mnvLxYsW%HsG$GlO`LZ6GBm9Z%4RE<9mX
zASEnDh7#rlEG4W98T)w|AtStO%}k68;S8X0QWl10W=4iQ4nu}wsd7d}1`v#70PA5d
z;aJGn&j;1RiLM6}&4vucrpUTLVw#M8nq0RyVIx1qw?s-Zic(X-u118>Eq)|XP{7<`
zgN$a}Vl7WB$}7Ib9a@}P6rNa=mzkG-OBzXWeqL%ka$wzJD@x2PPA$I0;hkCu8L!|)
z2`{8fi->YwQ0T@d=cFcrEC98Qi$NnRTueO7Jd85`d02QDmHro{pl4rD2?$Ek;OxuC
z&A^btn9i8Ol+KvKoX*J1%*0&FP|H}$1Rix}tYJ)H>1D2EPGPNOsbQ{R$!4l$tzj*0
z1Xty3HLPijDeNF|aN~^wR!JA@gR5vz<IMzA8ACHRM+zrg1!T+=l(9h_Ja$m`GmkNa
z6G;WAVGQY(hJnq1WPQ#QF1Q&<AX`A0yC@x`0>sK=OyL5vnW4=zmW7Nd+;iB(7;4#4
zcv5(4*uc#+c4#w=9jTed3?2z)f%=;PtH1ve@HeDs0`fPcVFGEiBKvy}$ju<%799k+
z6U0LE_gT1#ODHN3&Dnb(6`;bc=rKqIh?U2f0%^{Y?e7%+T8<QfT283{Ichj-7*Yg#
zS!=mcSZld!xN5kwnQD1zcqT9wPk@F3Pa0#25J(0!_9rkF8-QF0iuDPMMW!G(g4lVC
zDM%qPfw9;btcnrbkn;emg2gdBolIaX2E{%&9ZX;>ibhriV%Km$<C$|IC<LH$ITIL*
z3qfXsLS_PEQ5jezBrYJK1Z&iDFJw#+p2H!=P|KSlk|J8e3r-z;(A2?)lsb?b>^!yH
zSVKz;OK33_gJzSMQ`l-ip(Tzrw2p(^3JReKj74WaZUwP1LhBY-6(qFogH>S*t+z<3
zKr<%roCVT>Bea-`L32-R;Lu_!f_I?cX^P^|0;ef1cv&KWT$W5=DhA~hPzd2DO(rlE
zLxy)i9RsE!$nXv%Hj&d4Q!!+C2NXg~MUdegSO`hN%8Ut2#dY8i;#$Z!fvKn&6e`f+
zHAzItGJ&ahB1jdeM47-;G!3K*#HLE=!d1dp!vrl|P(y;b7?fA=hs0fwyFlq<0#nf=
zkh?%^a9Tn3;|H)Rh#x`2d$1uyNksWNfw`C&tO^uf%tfFS08c9^Qt<F%E*1l+s$p8l
zIDxrH8e}7goyV9W1!j|7-ayj?tellbNfXS)yP#zZt~9}13@-gazMsGhDgQxia7Z9Z
z59Z=hkn2IE>;&ebDv*moY;cK-2ruU14zMavcrh3Cf>l8V6C@FX2oso#XM<FMa|v_N
z0+1>Y8=gzZ4hfX9gsTQr@-jk8UKx~<m!;Sj8Y-Z&1Y60=Tnx$opq#>71j+xHIfbQ|
z0pt@<NK9ZUVg>mG#6}AhmSWJH92+QiOkgRJ0IPzA3W$wTmcZ-e2`t5WAhSWaWdchP
zXvUl!#D?b_ipvtD&;sQoP-rpX2rbaiHz+sZ3N4o67LZ#(>1F~;Q76b(AT~G+AyNlR
z@pP~%Q0ibQ0uAqD%S|lB@cHryEJesO=A0mNQ9=umyI6{kg3Jcxs0l1ZpaBwg5F4Jm
zs2N&}C6KfQtJP7{7He@efwaY149fkGp?KCJ1(2`cp@qnCtdRHzrKbt3koX6&(b5)c
zF=$u{lD1fj+`wkxN?Y)D&ji-u7;tETQa5W+B1jiX4x~J^K(olQez&;OGV?NvGvZ4!
zb5n2e7ndgGW`cV~5CPuelEk8t%)E3k7qsLhEi*MI1w8i*9{$&4tnvU2-k8FMUrgb{
zFGvGN@Tq4V1+acwkQQU`{4H#}9W*!;1?o9Qg9Z_pi%XL<*^tIp!1J=8L6#y%kRs5)
zJIe5hp#fz0#L(atb6RGeTZks}El&8P;4My2ZxP&&Ey@5*KQLD1gPdRqb2VsS2r?s%
z-PLfDz%DldO+mv=1Phsggb;o+NAa5>XzYjs-8Z07^CEkYZ{R~lpm8BXBUF!p<{?0X
zNSe$=IiR57fQ`_@e3D10PhbWT=aH92AScIwoWcrD9WO!Mt|HJ-l_qx)JPL{J_ZNXo
z(&R772AP@(A__r-C&+xH*=cj|z*CVoNEkfA1R3KoG=z*AfJU&2eBhFh!68FK@Yob&
zED5UKxX21*Jh*2GCcq;K!r<;@aei(peEzB!G*?`FjD>+gj8TD^iIML=6C=xi7Dl%J
zOpHumHi!oCK^Vja$%AMRAA~_{{9-Invq0v7umqGZ@Lz$6-ZttX*~kHP2gI)+zpyY8
zBZlrDP<Wy1LstXyv&8>_cYzWMBGl2{&Gw(TFu<k`m!AdxD^VfT2fK}WR0vh{5FBbL
zYoOD%%>SZtP`xL4iz5lT#;6E1>wHTx9x{%n2PrZ$^YiqIQp<}nOHyxf!i8?JL&d;_
z`YkcoU}1bxVo7mkZhU@eNkM5zQ7owZWhzLy#TJ}el$lx#o)-d7mw+eIk;aTcWh;0Z
z89d?+9-%D))%)PepeO-ULW4$2!2^0=hhrTS<gm%jPbtkwwF51wEe1`eD=}~|@-gu-
YLLdh-2O9?~2OA4J3nK_}F!AsL0KQn2(f|Me

diff --git a/test/input/3jobs.csv b/test/input/3jobs.csv
new file mode 100644
index 0000000..e899f77
--- /dev/null
+++ b/test/input/3jobs.csv
@@ -0,0 +1,4 @@
+job_id,workload_name,profile,submission_time,requested_number_of_resources,requested_time,success,final_state,starting_time,execution_time,finish_time,waiting_time,turnaround_time,stretch,allocated_resources,consumed_energy,metadata
+1216,user11,362,30,1,86400.000000,1,COMPLETED_SUCCESSFULLY,30,362.000000,80,0.000000,362.000000,1.000000,2,62671.250000,
+247,user5,57102,0,8,432000.000000,1,COMPLETED_SUCCESSFULLY,0,57102.000000,0,0.000000,57102.000000,1.000000,0,12391134.000000,
+1242,user11,9620,40,1,86400.000000,1,COMPLETED_SUCCESSFULLY,40,9620.000000,60,0.000000,9620.000000,1.000000,2,1665462.500000,
\ No newline at end of file
diff --git a/test/test_distance.py b/test/test_distance.py
index af1fbeb..536faec 100644
--- a/test/test_distance.py
+++ b/test/test_distance.py
@@ -32,6 +32,7 @@ def test_normalized_euclidean_distance():
 
 
 ####### Integration tests #######
+three_jobs = "test/input/3jobs.csv"
 three_jobs_w_session = "test/input/3jobs_w_sessions.csv"
 three_jobs_zero = "test/input/3jobs_zeros.csv"
 three_jobs_one_unsuccessful = "test/input/3jobs_1unsuccessful.csv"
@@ -43,38 +44,43 @@ def test_cleaning():
     
     # Clean unsuccessful jobs:
     with pytest.warns(UserWarning):
-        distances(three_jobs_w_session, three_jobs_one_unsuccessful)
+        distances(three_jobs, three_jobs_one_unsuccessful)
 
     # Complain if no matching job_ids:
     with pytest.raises(KeyError):
-        distances(three_jobs_w_session, mc_10days_a60)
+        distances(three_jobs, mc_10days_a60)
 
 
 def test_some_distances():
     fin, sub, start = ["finish_time"], ["submission_time"], ["starting_time"]
 
     # d(u,u) == 0 for all distances
-    assert distances(three_jobs_w_session, three_jobs_w_session, 
-        field=sub) == 0 
-    assert distances(three_jobs_w_session, three_jobs_w_session, 
-        field=fin) == 0 
-    assert distances(three_jobs_w_session, three_jobs_w_session, 
-        euclidean=False, norm_eucl=True, field=fin) == 0 
-    assert distances(three_jobs_w_session, three_jobs_w_session, 
-        euclidean=False, lateness=True, field=fin) == 0 
+    assert distances(three_jobs, three_jobs, field=sub) == 0 
+    assert distances(three_jobs, three_jobs, field=fin) == 0 
+    assert distances(three_jobs, three_jobs, 
+        euclidean=False, norm_eucl=True, field=fin)     == 0 
+    assert distances(three_jobs, three_jobs, 
+        euclidean=False, lateness=True, field=fin)      == 0 
 
     # Eucl distance
-    assert distances(three_jobs_w_session, three_jobs_zero,
-        field=sub) == 50
-    assert distances(three_jobs_zero, three_jobs_w_session,
-        field=start) == 50
-    assert distances(three_jobs_w_session, three_jobs_zero,
-        field=fin) == 100
+    assert distances(three_jobs, three_jobs_zero, field=sub)    == 50
+    assert distances(three_jobs_zero, three_jobs, field=start)  == 50
+    assert distances(three_jobs, three_jobs_zero, field=fin)    == 100
+
+    assert distances(three_jobs, three_jobs_w_session, field=sub)   == 0
+    assert distances(three_jobs, three_jobs_w_session, field=start) == 0
+    assert distances(three_jobs, three_jobs_w_session, field=fin)   == 20 * np.sqrt(2)
 
     # Normalized eucl distance
-    assert distances(three_jobs_w_session, three_jobs_zero, 
-        euclidean=False, norm_eucl=True, field=sub) ==          1
-    assert distances(three_jobs_zero, three_jobs_w_session,
-        euclidean=False, norm_eucl=True, field=start) ==        1
-    assert distances(three_jobs_w_session, three_jobs_zero,
-        euclidean=False, norm_eucl=True, field=fin) ==          1
\ No newline at end of file
+    assert distances(three_jobs, three_jobs_zero, 
+        euclidean=False, norm_eucl=True, field=sub)           == None
+    assert distances(three_jobs, three_jobs_w_session,
+        euclidean=False, norm_eucl=True, field=sub)           == 0
+    
+    norm_dis_A_B = distances(three_jobs, three_jobs_w_session,
+                        euclidean=False, norm_eucl=True, field=fin)
+    norm_dis_B_A = distances(three_jobs, three_jobs_w_session,
+                        euclidean=False, norm_eucl=True, field=fin)
+    expected = 800 / (100*100)
+    assert norm_dis_A_B == norm_dis_B_A
+    assert norm_dis_B_A - expected  < 1e-8
-- 
GitLab