From c12f9c444431a95a4cf2d1ffb8fbb93583c3492f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Madon?= <mael.madon@irit.fr> Date: Fri, 11 Nov 2022 10:53:25 +0100 Subject: [PATCH] new tool for computing distance between two jobs.csv files --- distance_batsim_output.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 distance_batsim_output.py diff --git a/distance_batsim_output.py b/distance_batsim_output.py new file mode 100644 index 0000000..883ad18 --- /dev/null +++ b/distance_batsim_output.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +"""Compute a set of distances between two batsim outputs (_jobs.csv)""" + +import pandas as pd + +def clean_and_select(df): + """Select only desired column from the dataframe and clean the job_ids""" + + # Select + desired_cols = ["job_id", "submission_time", "starting_time", "finish_time"] + select = df.loc[:, desired_cols] + + # Clean job_id + select.job_id = select.job_id.astype(str) + select["job_id"] = select["job_id"].str.split(':', expand=True)[0] + + return select + +def open_and_compare(file1, file2): + """Open file1 and file2, two _jobs.csv files. Checks if the job_ids match + and return their pandas Dataframe representation""" + + out1 = pd.read_csv(file1) + out2 = pd.read_csv(file2) + -- GitLab