Skip to content
Snippets Groups Projects
Commit 28485c9f authored by Maël Madon's avatar Maël Madon
Browse files

test: change the way to calculate the normalized euclidean distance to see if...

test: change the way to calculate the normalized euclidean distance to see if two outputs are close enough
parent 7c86cbe4
No related branches found
No related tags found
No related merge requests found
Pipeline #4620 passed
......@@ -49,8 +49,20 @@ def open_and_compare(file1, file2):
out1 = out1[out1.success == 1]
out2 = out2[out2.success == 1]
runtimes1 = out1.finish_time - out1.starting_time
runtimes2 = out2.finish_time - out2.starting_time
if not np.allclose(runtimes1, runtimes2, atol=1):
warnings.warn(f"Some jobs in {file1} and {file2} don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.")
return out1, out2
def sum_execution_times(out):
"""Returns the sum of all job execution times from a clean batsim output"""
runtimes = out.finish_time - out.starting_time
return runtimes.sum()
def euclidean_distance(s1, s2):
"""Returns the Euclidean distance between two series s1 and s2"""
......@@ -64,21 +76,6 @@ def lateness_distance(s1, s2):
return float(np.sum([y-x for x, y in zip(s1, s2)]))
def normalized_euclidian_distance(s1, s2):
"""Return the euclidien distance normalized by the l2 norm of the vectors,
or None if one of the vectors is the null vector (undefined)"""
n1, n2 = l2_norm(s1), l2_norm(s2)
if n1==0 or n2==0:
return None
eucl_dist = euclidean_distance(s1, s2)
return float( eucl_dist**2 / (n1 * n2) )
def l2_norm(s):
"""Return the l2 norm of the series s"""
return float( np.sqrt(np.sum([x * x for x in s])) )
def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False,
field=["finish_time"]):
......@@ -87,13 +84,20 @@ def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False,
out1, out2 = open_and_compare(file1, file2)
sum_runtimes1 = sum_execution_times(out1)
sum_runtimes2 = sum_execution_times(out2)
dist = {}
for f in field:
dist[f] = {}
eucl = euclidean_distance(out1[f], out2[f])
if euclidean:
dist[f]["euclidean"] = euclidean_distance(out1[f], out2[f])
dist[f]["euclidean"] = eucl
if norm_eucl:
dist[f]["normalized_euclidean"] = normalized_euclidian_distance(out1[f], out2[f])
if sum_runtimes1 == 0 or sum_runtimes2 == 0:
dist[f]["normalized_euclidean"] = None
else:
dist[f]["normalized_euclidean"] = eucl / np.sqrt(sum_runtimes1 * sum_runtimes2)
if lateness:
dist[f]["lateness"] = lateness_distance(out1[f], out2[f])
......
......@@ -99,7 +99,8 @@ def simu_output_are_close_enough(out_dir1, out_dir2):
"""Returns true if the jobs.csv in both directories are epsilon-close (for
the normalized euclidean distance) for each of the fields submission time,
start time and finish time."""
epsilon = 1e-6 # precision of 0.0001%
epsilon = 1e-4 # threshold under which we consider that the distance is
# neglectable compared to the summed execution times
dis = distances(f"{out_dir1}/_jobs.csv", f"{out_dir2}/_jobs.csv",
euclidean=False, norm_eucl=True,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment