diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..264dacab5a7aa25c1c26c6c3b20d48d637fe4b49 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*__pycache__ \ No newline at end of file diff --git a/distance_batsim_output.py b/distance_batsim_output.py index 88f1a68d1243e7b0d507df3007b6d754a0228315..c4bcb3f5482044d4d9ff412935be06b49cb8b031 100755 --- a/distance_batsim_output.py +++ b/distance_batsim_output.py @@ -66,10 +66,14 @@ def lateness_distance(s1, s2): return np.sum([y-x for x, y in zip(s1, s2)]) def normalized_euclidian_distance(s1, s2): - """Return the euclidien distance normalized by the l2 norm of the vectors""" + """Return the euclidien distance normalized by the l2 norm of the vectors, + or None if one of the vectors is the null vector (undefined)""" + n1, n2 = l2_norm(s1), l2_norm(s2) + if n1==0 or n2==0: + return None eucl_dist = euclidean_distance(s1, s2) - return eucl_dist**2 / (l2_norm(s1) * l2_norm(s2)) + return eucl_dist**2 / (n1 * n2) def l2_norm(s): """Return the l2 norm of the series s""" diff --git a/test/__pycache__/test_distance.cpython-310-pytest-7.1.3.pyc b/test/__pycache__/test_distance.cpython-310-pytest-7.1.3.pyc deleted file mode 100644 index 6b77cc084415056a31fc3552d358e90347969c98..0000000000000000000000000000000000000000 Binary files a/test/__pycache__/test_distance.cpython-310-pytest-7.1.3.pyc and /dev/null differ diff --git a/test/input/3jobs.csv b/test/input/3jobs.csv new file mode 100644 index 0000000000000000000000000000000000000000..e899f7717c2c1243dfa3ccf67f386bb48d190563 --- /dev/null +++ b/test/input/3jobs.csv @@ -0,0 +1,4 @@ +job_id,workload_name,profile,submission_time,requested_number_of_resources,requested_time,success,final_state,starting_time,execution_time,finish_time,waiting_time,turnaround_time,stretch,allocated_resources,consumed_energy,metadata +1216,user11,362,30,1,86400.000000,1,COMPLETED_SUCCESSFULLY,30,362.000000,80,0.000000,362.000000,1.000000,2,62671.250000, +247,user5,57102,0,8,432000.000000,1,COMPLETED_SUCCESSFULLY,0,57102.000000,0,0.000000,57102.000000,1.000000,0,12391134.000000, +1242,user11,9620,40,1,86400.000000,1,COMPLETED_SUCCESSFULLY,40,9620.000000,60,0.000000,9620.000000,1.000000,2,1665462.500000, \ No newline at end of file diff --git a/test/test_distance.py b/test/test_distance.py index af1fbeb90cffb1d8adbfa675ec1644338a193196..536faec44abe55a0bbff11e7da32c3acd5e8856c 100644 --- a/test/test_distance.py +++ b/test/test_distance.py @@ -32,6 +32,7 @@ def test_normalized_euclidean_distance(): ####### Integration tests ####### +three_jobs = "test/input/3jobs.csv" three_jobs_w_session = "test/input/3jobs_w_sessions.csv" three_jobs_zero = "test/input/3jobs_zeros.csv" three_jobs_one_unsuccessful = "test/input/3jobs_1unsuccessful.csv" @@ -43,38 +44,43 @@ def test_cleaning(): # Clean unsuccessful jobs: with pytest.warns(UserWarning): - distances(three_jobs_w_session, three_jobs_one_unsuccessful) + distances(three_jobs, three_jobs_one_unsuccessful) # Complain if no matching job_ids: with pytest.raises(KeyError): - distances(three_jobs_w_session, mc_10days_a60) + distances(three_jobs, mc_10days_a60) def test_some_distances(): fin, sub, start = ["finish_time"], ["submission_time"], ["starting_time"] # d(u,u) == 0 for all distances - assert distances(three_jobs_w_session, three_jobs_w_session, - field=sub) == 0 - assert distances(three_jobs_w_session, three_jobs_w_session, - field=fin) == 0 - assert distances(three_jobs_w_session, three_jobs_w_session, - euclidean=False, norm_eucl=True, field=fin) == 0 - assert distances(three_jobs_w_session, three_jobs_w_session, - euclidean=False, lateness=True, field=fin) == 0 + assert distances(three_jobs, three_jobs, field=sub) == 0 + assert distances(three_jobs, three_jobs, field=fin) == 0 + assert distances(three_jobs, three_jobs, + euclidean=False, norm_eucl=True, field=fin) == 0 + assert distances(three_jobs, three_jobs, + euclidean=False, lateness=True, field=fin) == 0 # Eucl distance - assert distances(three_jobs_w_session, three_jobs_zero, - field=sub) == 50 - assert distances(three_jobs_zero, three_jobs_w_session, - field=start) == 50 - assert distances(three_jobs_w_session, three_jobs_zero, - field=fin) == 100 + assert distances(three_jobs, three_jobs_zero, field=sub) == 50 + assert distances(three_jobs_zero, three_jobs, field=start) == 50 + assert distances(three_jobs, three_jobs_zero, field=fin) == 100 + + assert distances(three_jobs, three_jobs_w_session, field=sub) == 0 + assert distances(three_jobs, three_jobs_w_session, field=start) == 0 + assert distances(three_jobs, three_jobs_w_session, field=fin) == 20 * np.sqrt(2) # Normalized eucl distance - assert distances(three_jobs_w_session, three_jobs_zero, - euclidean=False, norm_eucl=True, field=sub) == 1 - assert distances(three_jobs_zero, three_jobs_w_session, - euclidean=False, norm_eucl=True, field=start) == 1 - assert distances(three_jobs_w_session, three_jobs_zero, - euclidean=False, norm_eucl=True, field=fin) == 1 \ No newline at end of file + assert distances(three_jobs, three_jobs_zero, + euclidean=False, norm_eucl=True, field=sub) == None + assert distances(three_jobs, three_jobs_w_session, + euclidean=False, norm_eucl=True, field=sub) == 0 + + norm_dis_A_B = distances(three_jobs, three_jobs_w_session, + euclidean=False, norm_eucl=True, field=fin) + norm_dis_B_A = distances(three_jobs, three_jobs_w_session, + euclidean=False, norm_eucl=True, field=fin) + expected = 800 / (100*100) + assert norm_dis_A_B == norm_dis_B_A + assert norm_dis_B_A - expected < 1e-8