diff --git a/distance_batsim_output.py b/distance_batsim_output.py
new file mode 100644
index 0000000000000000000000000000000000000000..883ad187135ee30b36dab59d8e2254c835607fc8
--- /dev/null
+++ b/distance_batsim_output.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+
+"""Compute a set of distances between two batsim outputs (_jobs.csv)"""
+
+import pandas as pd
+
+def clean_and_select(df):
+    """Select only desired column from the dataframe and clean the job_ids"""
+
+    # Select
+    desired_cols = ["job_id", "submission_time", "starting_time", "finish_time"]
+    select = df.loc[:, desired_cols]
+
+    # Clean job_id
+    select.job_id = select.job_id.astype(str)
+    select["job_id"] = select["job_id"].str.split(':', expand=True)[0]
+
+    return select
+
+def open_and_compare(file1, file2):
+    """Open file1 and file2, two _jobs.csv files. Checks if the job_ids match 
+    and return their pandas Dataframe representation"""
+
+    out1 = pd.read_csv(file1)
+    out2 = pd.read_csv(file2)
+