-
Millian Poquet authoredMillian Poquet authored
m100_agg_power_predictions.py 1.48 KiB
#!/usr/bin/env python3
import argparse
import glob
import os
import re
import pandas as pd
FILENAME_PARSE_REGEX = '''.*/filter123_user_(\d+)_total_power_mean_pred\.csv$'''
def read_aggregate_one_dir(dir, estimated_metrics):
full_df = None
r = re.compile(FILENAME_PARSE_REGEX)
for filename in glob.glob(f'{dir}/*.csv'):
m = r.match(filename)
if m is None:
raise RuntimeError(f'Unexpected file encountered: {filename}')
user_id = m.group(1)
df = pd.read_csv(filename, low_memory=False)
df['user_id'] = user_id
df[f'{estimated_metrics}_power_estimation'] = df[f'hist_pred_total_power_{estimated_metrics}']
df = df[['job_id', 'user_id', f'{estimated_metrics}_power_estimation']]
if full_df is None:
full_df = df
else:
full_df = pd.concat([full_df, df])
del df
return full_df
def read_aggregate_root_dir(root_dir):
full_df_mean = read_aggregate_one_dir(f'{root_dir}/total_power_mean_predictions_users_allmethods_mean', 'mean')
full_df_max = read_aggregate_one_dir(f'{root_dir}/total_power_mean_predictions_users_allmethods_max', 'max')
return full_df_mean.merge(full_df_max)
def agg_all_files():
parser = argparse.ArgumentParser()
parser.add_argument("input_root_dir")
parser.add_argument("output_file")
args = parser.parse_args()
df = read_aggregate_root_dir(args.input_root_dir)
df.sort_values(by=['job_id']).to_csv(args.output_file, index=False)