Skip to content
Snippets Groups Projects
Commit fdf447cb authored by Maël Madon's avatar Maël Madon
Browse files

adding an notebook for example distances

parent 5e6bb738
No related branches found
No related tags found
No related merge requests found
...@@ -57,13 +57,13 @@ def euclidean_distance(s1, s2): ...@@ -57,13 +57,13 @@ def euclidean_distance(s1, s2):
"""Returns the Euclidean distance between two series s1 and s2""" """Returns the Euclidean distance between two series s1 and s2"""
dist = np.sqrt(np.sum([(x-y) * (x-y) for x, y in zip(s1, s2)])) dist = np.sqrt(np.sum([(x-y) * (x-y) for x, y in zip(s1, s2)]))
return dist return float(dist)
def lateness_distance(s1, s2): def lateness_distance(s1, s2):
"""Returns the 'lateness' of s2 compared to s1""" """Returns the 'lateness' of s2 compared to s1"""
return np.sum([y-x for x, y in zip(s1, s2)]) return float(np.sum([y-x for x, y in zip(s1, s2)]))
def normalized_euclidian_distance(s1, s2): def normalized_euclidian_distance(s1, s2):
"""Return the euclidien distance normalized by the l2 norm of the vectors, """Return the euclidien distance normalized by the l2 norm of the vectors,
...@@ -73,12 +73,12 @@ def normalized_euclidian_distance(s1, s2): ...@@ -73,12 +73,12 @@ def normalized_euclidian_distance(s1, s2):
if n1==0 or n2==0: if n1==0 or n2==0:
return None return None
eucl_dist = euclidean_distance(s1, s2) eucl_dist = euclidean_distance(s1, s2)
return eucl_dist**2 / (n1 * n2) return float( eucl_dist**2 / (n1 * n2) )
def l2_norm(s): def l2_norm(s):
"""Return the l2 norm of the series s""" """Return the l2 norm of the series s"""
return np.sqrt(np.sum([x * x for x in s])) return float( np.sqrt(np.sum([x * x for x in s])) )
def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False, def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False,
...@@ -111,7 +111,7 @@ def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False, ...@@ -111,7 +111,7 @@ def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False,
def pretty_print(dist): def pretty_print(dist):
"""Nice printing of the dictionnary dist""" """Nice printing of the dictionnary dist"""
if type(dist) is dict: if isinstance(dist, dict):
pretty = json.dumps(dist, indent=4) pretty = json.dumps(dist, indent=4)
print(pretty) print(pretty)
else: else:
......
job_id,workload_name,profile,submission_time,requested_number_of_resources,requested_time,success,final_state,starting_time,execution_time,finish_time,waiting_time,turnaround_time,stretch,allocated_resources,consumed_energy,metadata job_id,workload_name,profile,submission_time,requested_number_of_resources,requested_time,success,final_state,starting_time,execution_time,finish_time,waiting_time,turnaround_time,stretch,allocated_resources,consumed_energy,metadata
1216,user11,362,30,1,86400.000000,1,COMPLETED_SUCCESSFULLY,30,362.000000,80,0.000000,362.000000,1.000000,2,62671.250000, 1216,user11,362,30,1,86400.000000,1,COMPLETED_SUCCESSFULLY,30,50,80,0.000000,362.000000,1.000000,2,62671.250000,
247,user5,57102,0,8,432000.000000,1,COMPLETED_SUCCESSFULLY,0,57102.000000,0,0.000000,57102.000000,1.000000,0,12391134.000000, 247,user5,57102,0,8,432000.000000,1,COMPLETED_SUCCESSFULLY,0,0,0,0.000000,57102.000000,1.000000,0,12391134.000000,
1242,user11,9620,40,1,86400.000000,1,COMPLETED_SUCCESSFULLY,40,9620.000000,60,0.000000,9620.000000,1.000000,2,1665462.500000, 1242,user11,9620,40,1,86400.000000,1,COMPLETED_SUCCESSFULLY,40,20,60,0.000000,9620.000000,1.000000,1,1665462.500000,
\ No newline at end of file \ No newline at end of file
job_id,workload_name,profile,submission_time,requested_number_of_resources,requested_time,success,final_state,starting_time,execution_time,finish_time,waiting_time,turnaround_time,stretch,allocated_resources,consumed_energy,metadata job_id,workload_name,profile,submission_time,requested_number_of_resources,requested_time,success,final_state,starting_time,execution_time,finish_time,waiting_time,turnaround_time,stretch,allocated_resources,consumed_energy,metadata
1216:s1,user11,362,30,1,86400.000000,1,COMPLETED_SUCCESSFULLY,30,362.000000,60,0.000000,362.000000,1.000000,2,62671.250000, 1216:s1,user11,362,30,1,86400.000000,1,COMPLETED_SUCCESSFULLY,30,30,60,0.000000,362.000000,1.000000,2,62671.250000,
247:s1,user5,57102,0,8,432000.000000,1,COMPLETED_SUCCESSFULLY,0,57102.000000,0,0.000000,57102.000000,1.000000,0,12391134.000000, 247:s1,user5,57102,0,8,432000.000000,1,COMPLETED_SUCCESSFULLY,0,0,0,0.000000,57102.000000,1.000000,0,12391134.000000,
1242:s1,user11,9620,40,1,86400.000000,1,COMPLETED_SUCCESSFULLY,40,9620.000000,80,0.000000,9620.000000,1.000000,2,1665462.500000, 1242:s1,user11,9620,40,1,86400.000000,1,COMPLETED_SUCCESSFULLY,40,40,80,0.000000,9620.000000,1.000000,1,1665462.500000,
\ No newline at end of file \ No newline at end of file
%% Cell type:markdown id: tags:
# Example of distances
This notebook calculate distances between several _jobs.csv as an example.
%% Cell type:code id: tags:
``` python
# Initialization
import pandas as pd
from evalys.jobset import JobSet
from evalys.visu.gantt import plot_gantt
three_jobs = "test/input/3jobs.csv"
three_jobs_w_session = "test/input/3jobs_w_sessions.csv"
three_jobs_zero = "test/input/3jobs_zeros.csv"
mc_10days_a60 = "test/input/mc_10days_a60_jobs.csv"
mc_10days_m60 = "test/input/mc_10days_m60_jobs.csv"
mc_10days_rigid = "test/input/mc_10days_rigid_jobs.csv"
```
%% Cell type:markdown id: tags:
Vizualize the useful columns of a jobs.csv:
%% Cell type:code id: tags:
``` python
out1 = pd.read_csv(mc_10days_a60)
# Select
desired_cols = ["job_id", "submission_time", "starting_time", "finish_time", "success"]
select = out1.loc[:, desired_cols]
# Clean job_id
select.job_id = select.job_id.astype(str)
select.job_id = select.job_id.str.split(':', expand=True)[0]
select.job_id = select.job_id.astype(int)
select
```
%% Output
job_id submission_time starting_time finish_time success
0 1216 55532.0 55532.0 55894.0 1
1 247 9327.0 9327.0 66429.0 1
2 1242 56876.0 56876.0 66496.0 1
3 1434 66504.0 66504.0 67496.0 1
4 1438 66506.0 66506.0 69764.0 1
.. ... ... ... ... ...
319 18943 643942.0 643942.0 954600.0 1
320 18945 643943.0 643943.0 958295.0 1
321 21862 753752.0 753752.0 960982.0 1
322 18944 643942.0 643942.0 961174.0 1
323 21995 760446.0 760446.0 1031099.0 1
[324 rows x 5 columns]
%% Cell type:markdown id: tags:
## With mock files
Visualize our two mock files 3jobs and 3jobs_w_sessions:
%% Cell type:code id: tags:
``` python
plot_gantt(JobSet.from_csv(three_jobs), title="three_jobs")
plot_gantt(JobSet.from_csv(three_jobs_w_session), title="three_jobs_w_session")
```
%% Output
/home/mael/.local/lib/python3.10/site-packages/evalys/visu/core.py:62: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure.
self.fig.show()
/home/mael/.local/lib/python3.10/site-packages/evalys/visu/core.py:62: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure.
self.fig.show()
%% Cell type:code id: tags:
``` python
!python3 distance_batsim_output.py {three_jobs} {three_jobs_w_session} --all
```
%% Output
{
"submission_time": {
"euclidean": 0.0,
"normalized_euclidean": 0.0,
"lateness": 0.0
},
"starting_time": {
"euclidean": 0.0,
"normalized_euclidean": 0.0,
"lateness": 0.0
},
"finish_time": {
"euclidean": 28.284271247461902,
"normalized_euclidean": 0.08000000000000002,
"lateness": 0.0
}
}
%% Cell type:code id: tags:
``` python
!python3 distance_batsim_output.py {three_jobs} {three_jobs_zero} --all
```
%% Output
{
"submission_time": {
"euclidean": 50.0,
"normalized_euclidean": null,
"lateness": -70.0
},
"starting_time": {
"euclidean": 50.0,
"normalized_euclidean": null,
"lateness": -70.0
},
"finish_time": {
"euclidean": 100.0,
"normalized_euclidean": null,
"lateness": -140.0
}
}
%% Cell type:code id: tags:
``` python
!python3 distance_batsim_output.py {mc_10days_a60} {mc_10days_m60} --all
```
%% Output
{
"submission_time": {
"euclidean": 172.1394783308001,
"normalized_euclidean": 2.739806304756353e-10,
"lateness": 2254.0
},
"starting_time": {
"euclidean": 172.1394783308001,
"normalized_euclidean": 2.739806304756353e-10,
"lateness": 2254.0
},
"finish_time": {
"euclidean": 172.1394783308001,
"normalized_euclidean": 2.3852456580198333e-10,
"lateness": 2254.0
}
}
%% Cell type:code id: tags:
``` python
!python3 distance_batsim_output.py {mc_10days_a60} {mc_10days_rigid} --all
```
%% Output
{
"submission_time": {
"euclidean": 241.15762480170517,
"normalized_euclidean": 5.377222025784075e-10,
"lateness": 3311.0
},
"starting_time": {
"euclidean": 241.15762480170517,
"normalized_euclidean": 5.377222025784075e-10,
"lateness": 3311.0
},
"finish_time": {
"euclidean": 255.14897609044016,
"normalized_euclidean": 5.240304046302593e-10,
"lateness": 3633.0
}
}
%% Cell type:code id: tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment