Skip to content
Snippets Groups Projects
Commit 6ef4c361 authored by Maël Madon's avatar Maël Madon
Browse files

test new distance and clean unused code

parent d216708b
Branches
No related tags found
No related merge requests found
...@@ -77,12 +77,6 @@ def lateness_distance(s1, s2): ...@@ -77,12 +77,6 @@ def lateness_distance(s1, s2):
return float(np.sum([y-x for x, y in zip(s1, s2)])) return float(np.sum([y-x for x, y in zip(s1, s2)]))
def l2_norm(s):
"""Return the l2 norm of the series s"""
return float( np.sqrt(np.sum([x * x for x in s])) )
def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False, def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False,
field=["finish_time"]): field=["finish_time"]):
"""Computes and returns a set of distances between two batsim outputs, if """Computes and returns a set of distances between two batsim outputs, if
......
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Example of distances # Example of distances
This notebook calculate distances between several _jobs.csv as an example. This notebook calculate distances between several _jobs.csv as an example.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Initialization # Initialization
import pandas as pd import pandas as pd
from evalys.jobset import JobSet from evalys.jobset import JobSet
from evalys.visu.gantt import plot_gantt from evalys.visu.gantt import plot_gantt
three_jobs = "test/input/3jobs.csv" three_jobs = "test/input/3jobs.csv"
three_jobs_w_session = "test/input/3jobs_w_sessions.csv" three_jobs_w_session = "test/input/3jobs_w_sessions.csv"
three_jobs_zero = "test/input/3jobs_zeros.csv" three_jobs_zero = "test/input/3jobs_zeros.csv"
mc_10days_a60 = "test/input/mc_10days_a60_jobs.csv" mc_10days_a60 = "test/input/mc_10days_a60_jobs.csv"
mc_10days_m60 = "test/input/mc_10days_m60_jobs.csv" mc_10days_m60 = "test/input/mc_10days_m60_jobs.csv"
mc_10days_rigid = "test/input/mc_10days_rigid_jobs.csv" mc_10days_rigid = "test/input/mc_10days_rigid_jobs.csv"
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Vizualize the useful columns of a jobs.csv: Vizualize the useful columns of a jobs.csv:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
out1 = pd.read_csv(mc_10days_a60) out1 = pd.read_csv(mc_10days_a60)
# Select # Select
desired_cols = ["job_id", "submission_time", "starting_time", "finish_time", "success"] desired_cols = ["job_id", "submission_time", "starting_time", "finish_time", "success"]
select = out1.loc[:, desired_cols] select = out1.loc[:, desired_cols]
# Clean job_id # Clean job_id
select.job_id = select.job_id.astype(str) select.job_id = select.job_id.astype(str)
select.job_id = select.job_id.str.split(':', expand=True)[0] select.job_id = select.job_id.str.split(':', expand=True)[0]
select.job_id = select.job_id.astype(int) select.job_id = select.job_id.astype(int)
select select
``` ```
%% Output %% Output
job_id submission_time starting_time finish_time success job_id submission_time starting_time finish_time success
0 1216 55532.0 55532.0 55894.0 1 0 1216 55532.0 55532.0 55894.0 1
1 247 9327.0 9327.0 66429.0 1 1 247 9327.0 9327.0 66429.0 1
2 1242 56876.0 56876.0 66496.0 1 2 1242 56876.0 56876.0 66496.0 1
3 1434 66504.0 66504.0 67496.0 1 3 1434 66504.0 66504.0 67496.0 1
4 1438 66506.0 66506.0 69764.0 1 4 1438 66506.0 66506.0 69764.0 1
.. ... ... ... ... ... .. ... ... ... ... ...
319 18943 643942.0 643942.0 954600.0 1 319 18943 643942.0 643942.0 954600.0 1
320 18945 643943.0 643943.0 958295.0 1 320 18945 643943.0 643943.0 958295.0 1
321 21862 753752.0 753752.0 960982.0 1 321 21862 753752.0 753752.0 960982.0 1
322 18944 643942.0 643942.0 961174.0 1 322 18944 643942.0 643942.0 961174.0 1
323 21995 760446.0 760446.0 1031099.0 1 323 21995 760446.0 760446.0 1031099.0 1
[324 rows x 5 columns] [324 rows x 5 columns]
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## With mock files ## With mock files
Visualize our two mock files 3jobs and 3jobs_w_sessions: Visualize our two mock files 3jobs and 3jobs_w_sessions:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
plot_gantt(JobSet.from_csv(three_jobs), title="three_jobs") plot_gantt(JobSet.from_csv(three_jobs), title="three_jobs")
plot_gantt(JobSet.from_csv(three_jobs_w_session), title="three_jobs_w_session") plot_gantt(JobSet.from_csv(three_jobs_w_session), title="three_jobs_w_session")
``` ```
%% Output %% Output
/home/mael/.local/lib/python3.10/site-packages/evalys/visu/core.py:62: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure. /home/mael/.local/lib/python3.10/site-packages/evalys/visu/core.py:62: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure.
self.fig.show() self.fig.show()
/home/mael/.local/lib/python3.10/site-packages/evalys/visu/core.py:62: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure. /home/mael/.local/lib/python3.10/site-packages/evalys/visu/core.py:62: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure.
self.fig.show() self.fig.show()
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!python3 distance_batsim_output.py {three_jobs} {three_jobs_w_session} --all !python3 distance_batsim_output.py {three_jobs} {three_jobs_w_session} --all
``` ```
%% Output %% Output
/home/mael/ownCloud/workspace/batmen-tools/distance_batsim_output.py:56: UserWarning: Some jobs in test/input/3jobs.csv and test/input/3jobs_w_sessions.csv don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation. /home/mael/ownCloud/workspace/batmen-tools/distance_batsim_output.py:56: UserWarning: Some jobs in test/input/3jobs.csv and test/input/3jobs_w_sessions.csv don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.
warnings.warn(f"Some jobs in {file1} and {file2} don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.") warnings.warn(f"Some jobs in {file1} and {file2} don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.")
{ {
"submission_time": { "submission_time": {
"euclidean": 0.0, "euclidean": 0.0,
"normalized_euclidean": 0.0, "normalized_euclidean": 0.0,
"lateness": 0.0 "lateness": 0.0
}, },
"starting_time": { "starting_time": {
"euclidean": 0.0, "euclidean": 0.0,
"normalized_euclidean": 0.0, "normalized_euclidean": 0.0,
"lateness": 0.0 "lateness": 0.0
}, },
"finish_time": { "finish_time": {
"euclidean": 28.284271247461902, "euclidean": 28.284271247461902,
"normalized_euclidean": 0.4040610178208843, "normalized_euclidean": 0.4040610178208843,
"lateness": 0.0 "lateness": 0.0
} }
} }
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!python3 distance_batsim_output.py {three_jobs} {three_jobs_zero} --all !python3 distance_batsim_output.py {three_jobs} {three_jobs_zero} --all
``` ```
%% Output %% Output
/home/mael/ownCloud/workspace/batmen-tools/distance_batsim_output.py:56: UserWarning: Some jobs in test/input/3jobs.csv and test/input/3jobs_zeros.csv don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation. /home/mael/ownCloud/workspace/batmen-tools/distance_batsim_output.py:56: UserWarning: Some jobs in test/input/3jobs.csv and test/input/3jobs_zeros.csv don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.
warnings.warn(f"Some jobs in {file1} and {file2} don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.") warnings.warn(f"Some jobs in {file1} and {file2} don't have the same runtime (+/- 1sec). It is unusual, as runtime are normally an input of the simulation.")
/home/mael/ownCloud/workspace/batmen-tools/distance_batsim_output.py:114: RuntimeWarning: divide by zero encountered in double_scalars
dist[f]["normalized_euclidean"] = eucl / np.sqrt(sum_runtimes1 * sum_runtimes2)
{ {
"submission_time": { "submission_time": {
"euclidean": 50.0, "euclidean": 50.0,
"normalized_euclidean": Infinity, "normalized_euclidean": null,
"lateness": -70.0 "lateness": -70.0
}, },
"starting_time": { "starting_time": {
"euclidean": 50.0, "euclidean": 50.0,
"normalized_euclidean": Infinity, "normalized_euclidean": null,
"lateness": -70.0 "lateness": -70.0
}, },
"finish_time": { "finish_time": {
"euclidean": 100.0, "euclidean": 100.0,
"normalized_euclidean": Infinity, "normalized_euclidean": null,
"lateness": -140.0 "lateness": -140.0
} }
} }
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!python3 distance_batsim_output.py {mc_10days_a60} {mc_10days_m60} --all !python3 distance_batsim_output.py {mc_10days_a60} {mc_10days_m60} --all
``` ```
%% Output %% Output
{ {
"submission_time": { "submission_time": {
"euclidean": 172.1394783308001, "euclidean": 172.1394783308001,
"normalized_euclidean": 1.2942984329407551e-05, "normalized_euclidean": 1.2942984329407551e-05,
"lateness": 2254.0 "lateness": 2254.0
}, },
"starting_time": { "starting_time": {
"euclidean": 172.1394783308001, "euclidean": 172.1394783308001,
"normalized_euclidean": 1.2942984329407551e-05, "normalized_euclidean": 1.2942984329407551e-05,
"lateness": 2254.0 "lateness": 2254.0
}, },
"finish_time": { "finish_time": {
"euclidean": 172.1394783308001, "euclidean": 172.1394783308001,
"normalized_euclidean": 1.2942984329407551e-05, "normalized_euclidean": 1.2942984329407551e-05,
"lateness": 2254.0 "lateness": 2254.0
} }
} }
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!python3 distance_batsim_output.py {mc_10days_a60} {mc_10days_rigid} --all !python3 distance_batsim_output.py {mc_10days_a60} {mc_10days_rigid} --all
``` ```
%% Output %% Output
{ {
"submission_time": { "submission_time": {
"euclidean": 241.15762480170517, "euclidean": 241.15762480170517,
"normalized_euclidean": 1.813216587526566e-05, "normalized_euclidean": 1.813216587526566e-05,
"lateness": 3311.0 "lateness": 3311.0
}, },
"starting_time": { "starting_time": {
"euclidean": 241.15762480170517, "euclidean": 241.15762480170517,
"normalized_euclidean": 1.813216587526566e-05, "normalized_euclidean": 1.813216587526566e-05,
"lateness": 3311.0 "lateness": 3311.0
}, },
"finish_time": { "finish_time": {
"euclidean": 255.14897609044016, "euclidean": 255.14897609044016,
"normalized_euclidean": 1.91841479662116e-05, "normalized_euclidean": 1.91841479662116e-05,
"lateness": 3633.0 "lateness": 3633.0
} }
} }
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
......
...@@ -19,17 +19,6 @@ def test_lateness_distance(): ...@@ -19,17 +19,6 @@ def test_lateness_distance():
assert lateness_distance(s1, s2) == - lateness_distance(s2, s1) == 6 assert lateness_distance(s1, s2) == - lateness_distance(s2, s1) == 6
assert lateness_distance(empty, empty) == 0 assert lateness_distance(empty, empty) == 0
def test_l2_norm():
assert l2_norm(empty) == 0
assert l2_norm(s1) == np.sqrt(10*10 + 14*14 + 500*500)
assert l2_norm(u) == 2
assert l2_norm(v) == 1
def test_normalized_euclidean_distance():
assert normalized_euclidian_distance(u, v) == .5
####### Integration tests ####### ####### Integration tests #######
three_jobs = "test/input/3jobs.csv" three_jobs = "test/input/3jobs.csv"
...@@ -40,7 +29,8 @@ mc_10days_a60 = "test/input/mc_10days_a60_jobs.csv" ...@@ -40,7 +29,8 @@ mc_10days_a60 = "test/input/mc_10days_a60_jobs.csv"
def test_cleaning(): def test_cleaning():
# Clean session info: # Clean session info:
distances(three_jobs_w_session, three_jobs_zero) with pytest.warns(UserWarning):
distances(three_jobs_w_session, three_jobs_zero)
# Clean unsuccessful jobs: # Clean unsuccessful jobs:
with pytest.warns(UserWarning): with pytest.warns(UserWarning):
...@@ -63,24 +53,26 @@ def test_some_distances(): ...@@ -63,24 +53,26 @@ def test_some_distances():
euclidean=False, lateness=True, field=fin) == 0 euclidean=False, lateness=True, field=fin) == 0
# Eucl distance # Eucl distance
assert distances(three_jobs, three_jobs_zero, field=sub) == 50 with pytest.warns(UserWarning):
assert distances(three_jobs_zero, three_jobs, field=start) == 50 assert distances(three_jobs, three_jobs_zero, field=sub) == 50
assert distances(three_jobs, three_jobs_zero, field=fin) == 100 assert distances(three_jobs_zero, three_jobs, field=start) == 50
assert distances(three_jobs, three_jobs_zero, field=fin) == 100
assert distances(three_jobs, three_jobs_w_session, field=sub) == 0 assert distances(three_jobs, three_jobs_w_session, field=sub) == 0
assert distances(three_jobs, three_jobs_w_session, field=start) == 0 assert distances(three_jobs, three_jobs_w_session, field=start) == 0
assert distances(three_jobs, three_jobs_w_session, field=fin) == 20 * np.sqrt(2) assert distances(three_jobs, three_jobs_w_session, field=fin) == 20 * np.sqrt(2)
# Normalized eucl distance # Normalized eucl distance
assert distances(three_jobs, three_jobs_zero, with pytest.warns(UserWarning):
euclidean=False, norm_eucl=True, field=sub) == None assert distances(three_jobs, three_jobs_zero,
assert distances(three_jobs, three_jobs_w_session, euclidean=False, norm_eucl=True, field=sub) == None
euclidean=False, norm_eucl=True, field=sub) == 0 assert distances(three_jobs, three_jobs_w_session,
euclidean=False, norm_eucl=True, field=sub) == 0
norm_dis_A_B = distances(three_jobs, three_jobs_w_session,
euclidean=False, norm_eucl=True, field=fin) norm_dis_A_B = distances(three_jobs, three_jobs_w_session,
norm_dis_B_A = distances(three_jobs, three_jobs_w_session, euclidean=False, norm_eucl=True, field=fin)
norm_dis_B_A = distances(three_jobs, three_jobs_w_session,
euclidean=False, norm_eucl=True, field=fin) euclidean=False, norm_eucl=True, field=fin)
expected = 800 / (100*100) expected = 20 * np.sqrt(2) / 70 # eucl / sum_runtime
assert norm_dis_A_B == norm_dis_B_A assert norm_dis_A_B == norm_dis_B_A
assert norm_dis_B_A - expected < 1e-8 assert norm_dis_B_A - expected < 1e-8
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment