Skip to content
Snippets Groups Projects
Commit c2807a15 authored by Maël Madon's avatar Maël Madon
Browse files

add a 'normalized euclidien' distance to the set of distances

parent 221240c1
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,7 @@ import numpy as np
import argparse
import json
import warnings
from scipy.stats import entropy
def clean_and_select(df):
......@@ -64,19 +65,32 @@ def lateness_distance(s1, s2):
return np.sum([y-x for x, y in zip(s1, s2)])
def normalized_euclidian_distance(s1, s2):
"""Return the euclidien distance normalized by the l2 norm of the vectors"""
def distances(file1, file2, euclidean=True, lateness=False,
eucl_dist = euclidean_distance(s1, s2)
return eucl_dist**2 / (l2_norm(s1) * l2_norm(s2))
def l2_norm(s):
"""Return the l2 norm of the series s"""
return np.sqrt(np.sum([x * x for x in s]))
def distances(file1, file2, euclidean=True, lateness=False, norm_eucl=False,
field=["finish_time"]):
"""Computes and returns a set of distances between two batsim outputs, if
they have the same job_ids."""
out1, out2 = open_and_compare(file1, file2)
dist = {}
dist = {}
for f in field:
dist[f] = {}
if euclidean:
if euclidean:
dist[f]["euclidean"] = euclidean_distance(out1[f], out2[f])
if norm_eucl:
dist[f]["normalized_euclidean"] = normalized_euclidian_distance(out1[f], out2[f])
if lateness:
dist[f]["lateness"] = lateness_distance(out1[f], out2[f])
......@@ -108,7 +122,7 @@ def main():
parser.add_argument("--type", nargs='+', default=['euclidean'],
help="Type of distance to use. Available values are "
"{euclidean, lateness}")
"{euclidean, lateness, normalized_euclidean}")
parser.add_argument("--field", nargs='+', default=['finish_time'],
help="The field to use to compute the distance. "
"Available values are {submission_time, starting_time, "
......@@ -119,12 +133,13 @@ def main():
args = parser.parse_args()
if args.all:
args.type = ["euclidean", "lateness"]
args.type = ["euclidean", "lateness", "normalized_euclidean"]
args.field = ["submission_time", "starting_time", "finish_time"]
dist = distances(file1=args.file1, file2=args.file2,
euclidean="euclidean" in args.type,
lateness="lateness" in args.type,
norm_eucl="normalized_euclidean" in args.type,
field=list(args.field))
pretty_print(dist)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment