Skip to content
Snippets Groups Projects
Commit 1fd00500 authored by Maël Madon's avatar Maël Madon
Browse files

cleaned and documented swf_filter

parent d3df2efe
Branches
No related tags found
No related merge requests found
......@@ -12,17 +12,10 @@ import re
from swf import SwfField
def generate_workload(input_swf, output_swf=None,
partitions_to_select=None,
job_walltime_factor=2,
given_walltime_only=False,
job_grain=1,
platform_size=None,
indent=None,
translate_submit_times=False,
keep_only=None,
quiet=False,
job_size_function_string='1*nb_res'):
def filter_workload(input_swf, output_swf=None,
partitions_to_select=None,
keep_only=None,
quiet=False):
"""Makes a selection from a SWF trace, optionally outputing it as SWF."""
element = '([-+]?\d+(?:\.\d+)?)'
r = re.compile('\s*' + (element + '\s+') * 17 + element + '\s*')
......@@ -45,23 +38,14 @@ def generate_workload(input_swf, output_swf=None,
if res:
# Parsing...
job_id = (int(float(res.group(SwfField.JOB_ID.value))))
nb_res = int(
float(res.group(SwfField.REQUESTED_NUMBER_OF_PROCESSORS.value)))
run_time = float(res.group(SwfField.RUN_TIME.value))
submit_time = max(0, float(res.group(SwfField.SUBMIT_TIME.value)))
walltime = max(job_walltime_factor * run_time,
float(res.group(SwfField.REQUESTED_TIME.value)))
walltime = float(res.group(SwfField.REQUESTED_TIME.value))
user_id = str(res.group(SwfField.USER_ID.value))
partition_id = int(res.group(SwfField.PARTITION_ID.value))
# nb_res may be changed by calling a user-given function
nb_res = eval(job_size_function_string)
if given_walltime_only:
walltime = float(res.group(SwfField.REQUESTED_TIME.value))
# Select jobs to keep
is_valid_job = (nb_res > 0 and walltime >
run_time and run_time > 0 and submit_time >= 0)
......@@ -70,14 +54,13 @@ def generate_workload(input_swf, output_swf=None,
use_job = select_partition and (
(keep_only is None) or eval(keep_only))
# Increment counters
if not is_valid_job:
not_valid += 1
if not use_job:
elif not use_job:
not_selected["nb"] += 1
not_selected["coreh"] += run_time * nb_res
else:
# Increment counters
selected["nb"] += 1
selected["coreh"] += run_time * nb_res
if user_id not in users:
......@@ -87,25 +70,22 @@ def generate_workload(input_swf, output_swf=None,
if output_swf is not None:
output_swf.write(line)
else:
not_line_match_format += 1
if not quiet:
print('\n-------------------\nEnd parsing.')
print('Total {} jobs and {} users have been created.'.format(
selected["nb"], len(users)))
print("\n-------------------\nEnd parsing.")
print(
'Total number of core-hours: {:.0f}'.format(selected["coreh"] / 3600))
print('{} valid jobs were not selected (keep_only) for {:.0f} core-hour'.format(
not_selected["nb"], not_selected["coreh"] / 3600))
f"Total {selected['nb']} jobs and {len(users)} users have been created.")
print(f"Total number of core-hours: {selected['coreh'] / 3600:.0f}")
print(f"{not_selected['nb']} valid jobs were not selected (keep_only) "
f"for {not_selected['coreh'] / 3600:.0f} core-hour")
print("Jobs not selected: {:.1f}% in number, {:.1f}% in core-hour"
.format(not_selected["nb"] / (not_selected["nb"]+selected["nb"]) * 100,
not_selected["coreh"] / (selected["coreh"]+not_selected["coreh"]) * 100))
print('{} out of {} lines in the file did not match the swf format'.format(
not_line_match_format, i))
print('{} jobs were not valid'.format(not_valid))
.format(not_selected["nb"] / (not_selected["nb"]+selected["nb"]) * 100,
not_selected["coreh"] / (selected["coreh"]+not_selected["coreh"]) * 100))
print(f"{not_line_match_format} out of {i} lines in the file did not "
"match the swf format")
print(f"{not_valid} jobs were not valid")
def main():
......@@ -115,7 +95,9 @@ def main():
Parses the input arguments then calls generate_flat_platform.
"""
parser = argparse.ArgumentParser(
description='Reads a SWF (Standard Workload Format) file and outputs some stats')
description="Filter jobs from the SWF input according to some criteria "
"and display the proportion of jobs kept, optionnally outputing the "
"selection in a new SWF file.")
parser.add_argument('input_swf', type=argparse.FileType('r'),
help='The input SWF file')
parser.add_argument('-o', '--output_swf',
......@@ -125,27 +107,6 @@ def main():
type=int, nargs='+', default=None,
help='List of partitions to only consider in the input trace. The jobs running in the other partitions will be discarded.')
parser.add_argument('-jsf', '--job-size-function',
type=str,
default='1*nb_res',
help='The function to apply on the jobs size. '
'The identity is used by default.')
parser.add_argument('-jwf', '--job_walltime_factor',
type=float, default=2,
help='Jobs walltimes are computed by the formula max(givenWalltime, jobWalltimeFactor*givenRuntime)')
parser.add_argument('-gwo', '--given_walltime_only',
action="store_true",
help='If set, only the given walltime in the trace will be used')
parser.add_argument('-jg', '--job_grain',
type=int, default=1,
help='Selects the level of detail we want for jobs. This parameter is used to group jobs that have close running time')
parser.add_argument('-pf', '--platform_size', type=int, default=None,
help='If set, the number of machines to put in the output JSON files is set by this parameter instead of taking the maximum job size')
parser.add_argument('-i', '--indent', type=int, default=None,
help='If set to a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0, or negative, will only insert newlines. The default value (None) selects the most compact representation.')
parser.add_argument('-t', '--translate_submit_times',
action="store_true",
help="If set, the jobs' submit times will be translated towards 0")
parser.add_argument('--keep_only',
type=str,
default=None,
......@@ -155,18 +116,11 @@ def main():
args = parser.parse_args()
generate_workload(input_swf=args.input_swf,
output_swf=args.output_swf,
partitions_to_select=args.partitions_to_select,
job_walltime_factor=args.job_walltime_factor,
given_walltime_only=args.given_walltime_only,
job_grain=args.job_grain,
platform_size=args.platform_size,
indent=args.indent,
translate_submit_times=args.translate_submit_times,
keep_only=args.keep_only,
quiet=args.quiet,
job_size_function_string=args.job_size_function)
filter_workload(input_swf=args.input_swf,
output_swf=args.output_swf,
partitions_to_select=args.partitions_to_select,
keep_only=args.keep_only,
quiet=args.quiet)
if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment