diff --git a/test/distance_batsim_output.py b/test/distance_batsim_output.py index b8938a8bb8f3c2c4b64a82139c6d71d166b50414..30aaa2b764b84621ab84c5a8489e4adcbcf710a1 100755 --- a/test/distance_batsim_output.py +++ b/test/distance_batsim_output.py @@ -49,8 +49,20 @@ def open_and_compare(file1, file2): out1 = out1[out1.success == 1] out2 = out2[out2.success == 1] + runtimes1 = out1.finish_time - out1.starting_time + runtimes2 = out2.finish_time - out2.starting_time + + if not np.allclose(runtimes1, runtimes2, atol=1): + warnings.warn(f"Some jobs in {file1} and {file2} don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.") + return out1, out2 +def sum_execution_times(out): + """Returns the sum of all job execution times from a clean batsim output""" + + runtimes = out.finish_time - out.starting_time + return runtimes.sum() + def euclidean_distance(s1, s2): """Returns the Euclidean distance between two series s1 and s2""" @@ -64,21 +76,6 @@ def lateness_distance(s1, s2): return float(np.sum([y-x for x, y in zip(s1, s2)])) -def normalized_euclidian_distance(s1, s2): - """Return the euclidien distance normalized by the l2 norm of the vectors, - or None if one of the vectors is the null vector (undefined)""" - - n1, n2 = l2_norm(s1), l2_norm(s2) - if n1==0 or n2==0: - return None - eucl_dist = euclidean_distance(s1, s2) - return float( eucl_dist**2 / (n1 * n2) ) - -def l2_norm(s): - """Return the l2 norm of the series s""" - - return float( np.sqrt(np.sum([x * x for x in s])) ) - def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False, field=["finish_time"]): @@ -87,13 +84,20 @@ def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False, out1, out2 = open_and_compare(file1, file2) + sum_runtimes1 = sum_execution_times(out1) + sum_runtimes2 = sum_execution_times(out2) + dist = {} for f in field: dist[f] = {} + eucl = euclidean_distance(out1[f], out2[f]) if euclidean: - dist[f]["euclidean"] = euclidean_distance(out1[f], out2[f]) + dist[f]["euclidean"] = eucl if norm_eucl: - dist[f]["normalized_euclidean"] = normalized_euclidian_distance(out1[f], out2[f]) + if sum_runtimes1 == 0 or sum_runtimes2 == 0: + dist[f]["normalized_euclidean"] = None + else: + dist[f]["normalized_euclidean"] = eucl / np.sqrt(sum_runtimes1 * sum_runtimes2) if lateness: dist[f]["lateness"] = lateness_distance(out1[f], out2[f]) diff --git a/test/test_fb_users.py b/test/test_fb_users.py index 678029116a4ee21049849f7cc326b901a1207fb0..cb3d89744d07efaa3616ee6462d45896df928dbe 100644 --- a/test/test_fb_users.py +++ b/test/test_fb_users.py @@ -99,7 +99,8 @@ def simu_output_are_close_enough(out_dir1, out_dir2): """Returns true if the jobs.csv in both directories are epsilon-close (for the normalized euclidean distance) for each of the fields submission time, start time and finish time.""" - epsilon = 1e-6 # precision of 0.0001% + epsilon = 1e-4 # threshold under which we consider that the distance is + # neglectable compared to the summed execution times dis = distances(f"{out_dir1}/_jobs.csv", f"{out_dir2}/_jobs.csv", euclidean=False, norm_eucl=True,