Skip to content
Snippets Groups Projects
Commit c2807a15 authored by Maël Madon's avatar Maël Madon
Browse files

add a 'normalized euclidien' distance to the set of distances

parent 221240c1
Branches
No related tags found
No related merge requests found
...@@ -7,6 +7,7 @@ import numpy as np ...@@ -7,6 +7,7 @@ import numpy as np
import argparse import argparse
import json import json
import warnings import warnings
from scipy.stats import entropy
def clean_and_select(df): def clean_and_select(df):
...@@ -64,19 +65,32 @@ def lateness_distance(s1, s2): ...@@ -64,19 +65,32 @@ def lateness_distance(s1, s2):
return np.sum([y-x for x, y in zip(s1, s2)]) return np.sum([y-x for x, y in zip(s1, s2)])
def normalized_euclidian_distance(s1, s2):
"""Return the euclidien distance normalized by the l2 norm of the vectors"""
def distances(file1, file2, euclidean=True, lateness=False, eucl_dist = euclidean_distance(s1, s2)
return eucl_dist**2 / (l2_norm(s1) * l2_norm(s2))
def l2_norm(s):
"""Return the l2 norm of the series s"""
return np.sqrt(np.sum([x * x for x in s]))
def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False,
field=["finish_time"]): field=["finish_time"]):
"""Computes and returns a set of distances between two batsim outputs, if """Computes and returns a set of distances between two batsim outputs, if
they have the same job_ids.""" they have the same job_ids."""
out1, out2 = open_and_compare(file1, file2) out1, out2 = open_and_compare(file1, file2)
dist = {}
dist = {}
for f in field: for f in field:
dist[f] = {} dist[f] = {}
if euclidean: if euclidean:
dist[f]["euclidean"] = euclidean_distance(out1[f], out2[f]) dist[f]["euclidean"] = euclidean_distance(out1[f], out2[f])
if norm_eucl:
dist[f]["normalized_euclidean"] = normalized_euclidian_distance(out1[f], out2[f])
if lateness: if lateness:
dist[f]["lateness"] = lateness_distance(out1[f], out2[f]) dist[f]["lateness"] = lateness_distance(out1[f], out2[f])
...@@ -108,7 +122,7 @@ def main(): ...@@ -108,7 +122,7 @@ def main():
parser.add_argument("--type", nargs='+', default=['euclidean'], parser.add_argument("--type", nargs='+', default=['euclidean'],
help="Type of distance to use. Available values are " help="Type of distance to use. Available values are "
"{euclidean, lateness}") "{euclidean, lateness, normalized_euclidean}")
parser.add_argument("--field", nargs='+', default=['finish_time'], parser.add_argument("--field", nargs='+', default=['finish_time'],
help="The field to use to compute the distance. " help="The field to use to compute the distance. "
"Available values are {submission_time, starting_time, " "Available values are {submission_time, starting_time, "
...@@ -119,12 +133,13 @@ def main(): ...@@ -119,12 +133,13 @@ def main():
args = parser.parse_args() args = parser.parse_args()
if args.all: if args.all:
args.type = ["euclidean", "lateness"] args.type = ["euclidean", "lateness", "normalized_euclidean"]
args.field = ["submission_time", "starting_time", "finish_time"] args.field = ["submission_time", "starting_time", "finish_time"]
dist = distances(file1=args.file1, file2=args.file2, dist = distances(file1=args.file1, file2=args.file2,
euclidean="euclidean" in args.type, euclidean="euclidean" in args.type,
lateness="lateness" in args.type, lateness="lateness" in args.type,
norm_eucl="normalized_euclidean" in args.type,
field=list(args.field)) field=list(args.field))
pretty_print(dist) pretty_print(dist)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment