diff --git a/distance_batsim_output.py b/distance_batsim_output.py index b8938a8bb8f3c2c4b64a82139c6d71d166b50414..8b8e0865fe26cb492f1f0121c670183112d37b17 100755 --- a/distance_batsim_output.py +++ b/distance_batsim_output.py @@ -49,8 +49,20 @@ def open_and_compare(file1, file2): out1 = out1[out1.success == 1] out2 = out2[out2.success == 1] + runtimes1 = out1.finish_time - out1.starting_time + runtimes2 = out2.finish_time - out2.starting_time + + if not np.allclose(runtimes1, runtimes2, atol=1): + warnings.warn(f"Some jobs in {file1} and {file2} don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.") + return out1, out2 +def sum_execution_times(out): + """Returns the sum of all job execution times from a clean batsim output""" + + runtimes = out.finish_time - out.starting_time + return runtimes.sum() + def euclidean_distance(s1, s2): """Returns the Euclidean distance between two series s1 and s2""" @@ -64,15 +76,6 @@ def lateness_distance(s1, s2): return float(np.sum([y-x for x, y in zip(s1, s2)])) -def normalized_euclidian_distance(s1, s2): - """Return the euclidien distance normalized by the l2 norm of the vectors, - or None if one of the vectors is the null vector (undefined)""" - - n1, n2 = l2_norm(s1), l2_norm(s2) - if n1==0 or n2==0: - return None - eucl_dist = euclidean_distance(s1, s2) - return float( eucl_dist**2 / (n1 * n2) ) def l2_norm(s): """Return the l2 norm of the series s""" @@ -87,13 +90,20 @@ def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False, out1, out2 = open_and_compare(file1, file2) + sum_runtimes1 = sum_execution_times(out1) + sum_runtimes2 = sum_execution_times(out2) + dist = {} for f in field: dist[f] = {} + eucl = euclidean_distance(out1[f], out2[f]) if euclidean: - dist[f]["euclidean"] = euclidean_distance(out1[f], out2[f]) + dist[f]["euclidean"] = eucl if norm_eucl: - dist[f]["normalized_euclidean"] = normalized_euclidian_distance(out1[f], out2[f]) + if sum_runtimes1 == 0 or sum_runtimes2 == 0: + dist[f]["normalized_euclidean"] = None + else: + dist[f]["normalized_euclidean"] = eucl / np.sqrt(sum_runtimes1 * sum_runtimes2) if lateness: dist[f]["lateness"] = lateness_distance(out1[f], out2[f]) diff --git a/example_distance.ipynb b/example_distance.ipynb index a4984069c80bf726c1c2f50979f67a965d88cdfd..6a64b40379374a0e315bcbe2b455f829e073f2fc 100644 --- a/example_distance.ipynb +++ b/example_distance.ipynb @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -179,7 +179,7 @@ "[324 rows x 5 columns]" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -250,13 +250,15 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "/home/mael/ownCloud/workspace/batmen-tools/distance_batsim_output.py:56: UserWarning: Some jobs in test/input/3jobs.csv and test/input/3jobs_w_sessions.csv don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.\n", + " warnings.warn(f\"Some jobs in {file1} and {file2} don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.\")\n", "{\n", " \"submission_time\": {\n", " \"euclidean\": 0.0,\n", @@ -270,7 +272,7 @@ " },\n", " \"finish_time\": {\n", " \"euclidean\": 28.284271247461902,\n", - " \"normalized_euclidean\": 0.08000000000000002,\n", + " \"normalized_euclidean\": 0.4040610178208843,\n", " \"lateness\": 0.0\n", " }\n", "}\n" @@ -283,27 +285,31 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "/home/mael/ownCloud/workspace/batmen-tools/distance_batsim_output.py:56: UserWarning: Some jobs in test/input/3jobs.csv and test/input/3jobs_zeros.csv don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.\n", + " warnings.warn(f\"Some jobs in {file1} and {file2} don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.\")\n", + "/home/mael/ownCloud/workspace/batmen-tools/distance_batsim_output.py:114: RuntimeWarning: divide by zero encountered in double_scalars\n", + " dist[f][\"normalized_euclidean\"] = eucl / np.sqrt(sum_runtimes1 * sum_runtimes2)\n", "{\n", " \"submission_time\": {\n", " \"euclidean\": 50.0,\n", - " \"normalized_euclidean\": null,\n", + " \"normalized_euclidean\": Infinity,\n", " \"lateness\": -70.0\n", " },\n", " \"starting_time\": {\n", " \"euclidean\": 50.0,\n", - " \"normalized_euclidean\": null,\n", + " \"normalized_euclidean\": Infinity,\n", " \"lateness\": -70.0\n", " },\n", " \"finish_time\": {\n", " \"euclidean\": 100.0,\n", - " \"normalized_euclidean\": null,\n", + " \"normalized_euclidean\": Infinity,\n", " \"lateness\": -140.0\n", " }\n", "}\n" @@ -316,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -326,17 +332,17 @@ "{\n", " \"submission_time\": {\n", " \"euclidean\": 172.1394783308001,\n", - " \"normalized_euclidean\": 2.739806304756353e-10,\n", + " \"normalized_euclidean\": 1.2942984329407551e-05,\n", " \"lateness\": 2254.0\n", " },\n", " \"starting_time\": {\n", " \"euclidean\": 172.1394783308001,\n", - " \"normalized_euclidean\": 2.739806304756353e-10,\n", + " \"normalized_euclidean\": 1.2942984329407551e-05,\n", " \"lateness\": 2254.0\n", " },\n", " \"finish_time\": {\n", " \"euclidean\": 172.1394783308001,\n", - " \"normalized_euclidean\": 2.3852456580198333e-10,\n", + " \"normalized_euclidean\": 1.2942984329407551e-05,\n", " \"lateness\": 2254.0\n", " }\n", "}\n" @@ -349,7 +355,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -359,17 +365,17 @@ "{\n", " \"submission_time\": {\n", " \"euclidean\": 241.15762480170517,\n", - " \"normalized_euclidean\": 5.377222025784075e-10,\n", + " \"normalized_euclidean\": 1.813216587526566e-05,\n", " \"lateness\": 3311.0\n", " },\n", " \"starting_time\": {\n", " \"euclidean\": 241.15762480170517,\n", - " \"normalized_euclidean\": 5.377222025784075e-10,\n", + " \"normalized_euclidean\": 1.813216587526566e-05,\n", " \"lateness\": 3311.0\n", " },\n", " \"finish_time\": {\n", " \"euclidean\": 255.14897609044016,\n", - " \"normalized_euclidean\": 5.240304046302593e-10,\n", + " \"normalized_euclidean\": 1.91841479662116e-05,\n", " \"lateness\": 3633.0\n", " }\n", "}\n"