From a8db6c863210f1dfdf5073abf7cc686a63d0a087 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ABl=20Madon?= <mael.madon@irit.fr>
Date: Wed, 16 Nov 2022 14:26:17 +0100
Subject: [PATCH] use job_id as index in the panda dataframes

---
 distance_batsim_output.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/distance_batsim_output.py b/distance_batsim_output.py
index b5e324e..3f32433 100755
--- a/distance_batsim_output.py
+++ b/distance_batsim_output.py
@@ -21,8 +21,9 @@ def clean_and_select(df):
     select.job_id = select.job_id.astype(str)
     select["job_id"] = select["job_id"].str.split(':', expand=True)[0]
     select.job_id = select.job_id.astype(int)
+    select.sort_values(by="job_id")
 
-    return select.sort_values(by="job_id")
+    return select.set_index("job_id")
 
 
 def open_and_compare(file1, file2):
@@ -35,7 +36,7 @@ def open_and_compare(file1, file2):
     out1 = clean_and_select(out1)
     out2 = clean_and_select(out2)
 
-    if not out1.job_id.equals(out2.job_id):
+    if not out1.index.equals(out2.index):
         raise KeyError(
             f"{file1} and {file2} cannot be compared: they don't have the same job_ids")
 
-- 
GitLab