diff --git a/README.md b/README.md index 62e6b9b6e5a1a535eb065dcb8baff801c495503d..c66b0b4515c5255f4af20f9073163940bc5639ae 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # swf2sessions -Python script to read a workload trace in the [Standard Workload Format](https://www.cs.huji.ac.il/labs/parallel/workload/swf.html) (SWF), decompose it into user sessions and store the results in the Session Annotated Batsim JSON format (SABjson). +Python script to read a workload trace in the [Standard Workload Format](https://www.cs.huji.ac.il/labs/parallel/workload/swf.html) (SWF), decompose it into user sessions, analyse the dependencies between sessions and store the results in the Session Annotated Batsim JSON format (SABjson). ## What is a session? -Analysing the workload trace of a parallel infrastructure to identify user sessions was started by Zackay and Feitelson ([Zackay and Feitelson 2013](https://www.cs.huji.ac.il/w~feit/parsched/jsspp12/p12-zakay.pdf)). +Analysing the workload trace of a parallel infrastructure to identify user sessions and their dependencies was started by Zackay and Feitelson ([Zackay and Feitelson 2013](https://www.cs.huji.ac.il/w~feit/parsched/jsspp12/p12-zakay.pdf)). The idea behind it is to keep the *logic* of user submissions rather than the exact submission times. For example, in the image below, the workload has been split into 4 sessions following the "Arrival" delimitation approach:  @@ -23,7 +23,7 @@ Requirements: To run the session decomposition on the workload `workloads/example.swf` illustrated above, with "Arrival" delimitation approach and a threshold of 60 minutes: ```terminal -python3 swf2sessions.py -at 60 workloads/example.swf out/ +python3 swf2sessions.py -a 60 workloads/example.swf out/ ``` For more documentation, see: `python3 swf2sessions.py -h` diff --git a/swf2userSessions.py b/swf2userSessions.py index 83af1f1365670186306d730224507698ec1d848a..897fc0cced1cd1d6ec6a47d2f3ff28f837d41ad3 100755 --- a/swf2userSessions.py +++ b/swf2userSessions.py @@ -74,35 +74,18 @@ if __name__== "__main__": help='The folder that will store the output files') group = parser.add_mutually_exclusive_group(required=True) - group.add_argument('-a', - '--arrival', - help='Arrival delimitation approach', - action="store_true") - group.add_argument('-l', - '--last', - help='Last delimitation approach', - action="store_true") - group.add_argument('-m', - '--max', - help='Max delimitation approach', - action="store_true") - parser.add_argument( - '-t', - '--threshold', - type=int, - help= - 'Unit: minutes. The threshold (on think-time or inter-arrival time, depending on the delimiation approach) above which a job will be considered to be in a new session.' - ) - - parser.add_argument( - '--no_dynamic_reduction', - action="store_true", + group.add_argument('-a', '--arrival', metavar="THRESHOLD", type=float, + help="'Arrival' delimitation approach. A job starts a new session if the inter-arrival time with the last job is above the threshold (in minutes)") + group.add_argument("-l", "--last", metavar="THRESHOLD", type=float, + help="'Last' delimitation approach: a job starts a new session if the think time after the last job is above the threshold (in minutes)") + group.add_argument("-m", "--max", metavar="THRESHOLD", type=float, + help="'Max' delimitation approach: a job starts a new session if the think time after the previous job with the highest finish time is above the threshold (in minutes)") + + parser.add_argument('--no_dynamic_reduction', action="store_true", help= 'Unless this option is specified, during the construction of the graph the algorithm dynamically avoids to add an edge between two nodes if a path already exists.' ) - parser.add_argument( - '--graph', - action="store_true", + parser.add_argument('--graph', action="store_true", help= "Build a graphical representation of each session graph and save them in a subfolder as gml files" ) @@ -111,17 +94,25 @@ if __name__== "__main__": args = parser.parse_args() - if args.last: + if args.last is not None: delim = 'last' - elif args.max: + threshold = args.last + elif args.max is not None: delim = 'max' - else: + threshold = args.max + elif args.arrival is not None: delim = 'arrival' + threshold = args.arrival + else: # should never happen + raise argparse.ArgumentError("You should specify a delimitation approach") + + if threshold < 0: + raise argparse.ArgumentTypeError("The threshold must be a positive value.") swf2sessions(input_swf=args.input_swf, output_dir=args.output_dir, delim_approach=delim, - delim_threshold=args.threshold, + delim_threshold=threshold, dynamic_reduction=not (args.no_dynamic_reduction), build_graph_rep=args.graph, quiet=args.quiet) \ No newline at end of file diff --git a/test/conftest.py b/test/conftest.py index bbc43a6e4e10d163ca88f5adb617efcaf890991d..065c002d35bc0a57300818f8f5ca76b4d008664f 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -29,11 +29,11 @@ def run_script(delim, if not graph: cp = subprocess.run(['python3', 'swf2userSessions.py', - '-at', str(threshold), '-q', input_swf, out_dir], + '-a', str(threshold), '-q', input_swf, out_dir], check=True) else: cp = subprocess.run(['python3', 'swf2userSessions.py', - '-at', str(threshold), '--graph', '-q', input_swf, out_dir], + '-a', str(threshold), '--graph', '-q', input_swf, out_dir], check=True) return out_dir, cp \ No newline at end of file