diff --git a/distance_batsim_output.py b/distance_batsim_output.py new file mode 100644 index 0000000000000000000000000000000000000000..883ad187135ee30b36dab59d8e2254c835607fc8 --- /dev/null +++ b/distance_batsim_output.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +"""Compute a set of distances between two batsim outputs (_jobs.csv)""" + +import pandas as pd + +def clean_and_select(df): + """Select only desired column from the dataframe and clean the job_ids""" + + # Select + desired_cols = ["job_id", "submission_time", "starting_time", "finish_time"] + select = df.loc[:, desired_cols] + + # Clean job_id + select.job_id = select.job_id.astype(str) + select["job_id"] = select["job_id"].str.split(':', expand=True)[0] + + return select + +def open_and_compare(file1, file2): + """Open file1 and file2, two _jobs.csv files. Checks if the job_ids match + and return their pandas Dataframe representation""" + + out1 = pd.read_csv(file1) + out2 = pd.read_csv(file2) +