diff --git a/expe_replay_feedback_metacentrum.ipynb b/expe_replay_feedback_metacentrum.ipynb index 8d54652b536969909704939a48d1d84b82d74198..bdbe50b271b5f10a6726b92e9fa69744179c808c 100644 --- a/expe_replay_feedback_metacentrum.ipynb +++ b/expe_replay_feedback_metacentrum.ipynb @@ -12,7 +12,356 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Metacentrum\n", + "# Metacentrum\n", + "\n", + "## Initializing" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "# For easily changing the workload\n", + "EXPE_DIR = \"out/expe_replay_MC\"\n", + "PF_folder = \"platform/average_metacentrum\"\n", + "WL_folder = \"workload/MC\"\n", + "WL_full_swf_path = \"/home/mael/git/middleware_generators/workload/input/METACENTRUM-2013-3.swf\"\n", + "begin_data_in_swf = 109 # line number (counting from 1)\n", + "\n", + "WL_filtered2013_swf_path = \"/home/mael/git/middleware_generators/workload/output/METACENTRUM-2013_filtered.swf\"\n", + "WL_filtered2014_swf_path = \"/home/mael/git/middleware_generators/workload/output/METACENTRUM-2014_filtered.swf\"\n", + "\n", + "# Original log params\n", + "# WL_URL = \"http://www.cs.huji.ac.il/labs/parallel/workload/l_sdsc_sp2/SDSC-SP2-1998-4.swf.gz\"\n", + "# WL_start_time = '1998-04-24 18:11:04'\n", + "# timezone=\"US/Pacific\"\n", + "\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "header=[\n", + " \"JOB_ID\",\"SUBMIT_TIME\",\"WAIT_TIME\",\"RUN_TIME\",\"ALLOCATED_PROCESSOR_COUNT\",\"AVERAGE_CPU_TIME_USED\",\"USED_MEMORY\",\n", + " \"REQUESTED_NUMBER_OF_PROCESSORS\",\"REQUESTED_TIME\",\"REQUESTED_MEMORY\",\"STATUS\",\"USER_ID\",\"GROUP_ID\",\"APPLICATION_ID\", \n", + " \"QUEUD_ID\",\"PARTITION_ID\",\"PRECEDING_JOB_ID\",\"THINK_TIME_FROM_PRECEDING_JOB\"]\n", + "\n", + "# WL_full = pd.read_csv(WL_full_swf_path, header=begin_data_in_swf-2, delim_whitespace=True, names=header)\n", + "WL_filtered2013 = pd.read_csv(WL_filtered2013_swf_path, delim_whitespace=True, names=header)\n", + "WL_filtered2014 = pd.read_csv(WL_filtered2014_swf_path, delim_whitespace=True, names=header)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-1 765986\n", + "Name: STATUS, dtype: int64\n" + ] + } + ], + "source": [ + "WL_filtered[\"finish_time\"] = WL_filtered.SUBMIT_TIME + WL_filtered.WAIT_TIME + WL_filtered.RUN_TIME\n", + "WL_filtered[\"turnaround_time\"] = WL_filtered.WAIT_TIME + WL_filtered.RUN_TIME\n", + "\n", + "print(WL_filtered.STATUS.value_counts())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Simulation results:\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>makespan</th>\n", + " <th>length</th>\n", + " <th>nb_jobs</th>\n", + " <th>mean_waiting_time</th>\n", + " <th>median_waiting_time</th>\n", + " <th>max_waiting_time</th>\n", + " <th>mean_slowdown</th>\n", + " <th>max_slowdown</th>\n", + " <th>mean_turnaround_time</th>\n", + " <th>max_turnaround_time</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2013_filtered</th>\n", + " <th>0</th>\n", + " <td>39968099</td>\n", + " <td>31535248</td>\n", + " <td>765986</td>\n", + " <td>20205.083068</td>\n", + " <td>1259.0</td>\n", + " <td>18163266</td>\n", + " <td>801.890002</td>\n", + " <td>4879140.5</td>\n", + " <td>23984.826289</td>\n", + " <td>18163288</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2014_filtered</th>\n", + " <th>0</th>\n", + " <td>39127717</td>\n", + " <td>31449352</td>\n", + " <td>1212560</td>\n", + " <td>19429.814337</td>\n", + " <td>229.0</td>\n", + " <td>7829272</td>\n", + " <td>1521.444489</td>\n", + " <td>2636706.0</td>\n", + " <td>22160.752967</td>\n", + " <td>7856812</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " makespan length nb_jobs mean_waiting_time \\\n", + "2013_filtered 0 39968099 31535248 765986 20205.083068 \n", + "2014_filtered 0 39127717 31449352 1212560 19429.814337 \n", + "\n", + " median_waiting_time max_waiting_time mean_slowdown \\\n", + "2013_filtered 0 1259.0 18163266 801.890002 \n", + "2014_filtered 0 229.0 7829272 1521.444489 \n", + "\n", + " max_slowdown mean_turnaround_time max_turnaround_time \n", + "2013_filtered 0 4879140.5 23984.826289 18163288 \n", + "2014_filtered 0 2636706.0 22160.752967 7856812 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Simulation results (with readable durations):\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>makespan</th>\n", + " <th>length</th>\n", + " <th>nb_jobs</th>\n", + " <th>mean_waiting_time</th>\n", + " <th>median_waiting_time</th>\n", + " <th>max_waiting_time</th>\n", + " <th>mean_slowdown</th>\n", + " <th>max_slowdown</th>\n", + " <th>mean_turnaround_time</th>\n", + " <th>max_turnaround_time</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2013_filtered</th>\n", + " <th>0</th>\n", + " <td>462 days 14:14:59</td>\n", + " <td>364 days 23:47:28</td>\n", + " <td>765986</td>\n", + " <td>0 days 05:36:45</td>\n", + " <td>0 days 00:20:59</td>\n", + " <td>210 days 05:21:06</td>\n", + " <td>801.890002</td>\n", + " <td>4879140.5</td>\n", + " <td>0 days 06:39:44</td>\n", + " <td>210 days 05:21:28</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2014_filtered</th>\n", + " <th>0</th>\n", + " <td>452 days 20:48:37</td>\n", + " <td>363 days 23:55:52</td>\n", + " <td>1212560</td>\n", + " <td>0 days 05:23:49</td>\n", + " <td>0 days 00:03:49</td>\n", + " <td>90 days 14:47:52</td>\n", + " <td>1521.444489</td>\n", + " <td>2636706.0</td>\n", + " <td>0 days 06:09:20</td>\n", + " <td>90 days 22:26:52</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " makespan length nb_jobs \\\n", + "2013_filtered 0 462 days 14:14:59 364 days 23:47:28 765986 \n", + "2014_filtered 0 452 days 20:48:37 363 days 23:55:52 1212560 \n", + "\n", + " mean_waiting_time median_waiting_time max_waiting_time \\\n", + "2013_filtered 0 0 days 05:36:45 0 days 00:20:59 210 days 05:21:06 \n", + "2014_filtered 0 0 days 05:23:49 0 days 00:03:49 90 days 14:47:52 \n", + "\n", + " mean_slowdown max_slowdown mean_turnaround_time \\\n", + "2013_filtered 0 801.890002 4879140.5 0 days 06:39:44 \n", + "2014_filtered 0 1521.444489 2636706.0 0 days 06:09:20 \n", + "\n", + " max_turnaround_time \n", + "2013_filtered 0 210 days 05:21:28 \n", + "2014_filtered 0 90 days 22:26:52 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "header=[\n", + " \"JOB_ID\",\"SUBMIT_TIME\",\"WAIT_TIME\",\"RUN_TIME\",\"ALLOCATED_PROCESSOR_COUNT\",\"AVERAGE_CPU_TIME_USED\",\"USED_MEMORY\",\n", + " \"REQUESTED_NUMBER_OF_PROCESSORS\",\"REQUESTED_TIME\",\"REQUESTED_MEMORY\",\"STATUS\",\"USER_ID\",\"GROUP_ID\",\"APPLICATION_ID\", \n", + " \"QUEUD_ID\",\"PARTITION_ID\",\"PRECEDING_JOB_ID\",\"THINK_TIME_FROM_PRECEDING_JOB\"]\n", + "\n", + "# WL_full = pd.read_csv(WL_full_swf_path, header=begin_data_in_swf-2, delim_whitespace=True, names=header)\n", + "WL_filtered2013 = pd.read_csv(WL_filtered2013_swf_path, delim_whitespace=True, names=header)\n", + "WL_filtered2014 = pd.read_csv(WL_filtered2014_swf_path, delim_whitespace=True, names=header)\n", + "\n", + "dfs = []\n", + "\n", + "for WL_swf in [WL_filtered2013, WL_filtered2014]:\n", + " WL_swf[\"finish_time\"] = WL_swf.SUBMIT_TIME + WL_swf.WAIT_TIME + WL_swf.RUN_TIME\n", + " WL_swf[\"turnaround_time\"] = WL_swf.WAIT_TIME + WL_swf.RUN_TIME\n", + " WL_swf[\"slowdown\"] = WL_swf.turnaround_time / WL_swf.RUN_TIME.replace(0, 1)\n", + "\n", + " original_metrics = pd.DataFrame.from_dict({\n", + " \"nb_jobs\": [WL_swf.shape[0]],\n", + " # \"nb_jobs_success\": WL_swf.STATUS.value_counts()[1],\n", + " \"mean_waiting_time\": WL_swf.WAIT_TIME.mean(),\n", + " \"median_waiting_time\": WL_swf.WAIT_TIME.median(),\n", + " \"max_waiting_time\": WL_swf.WAIT_TIME.max(),\n", + " \"makespan\": WL_swf.finish_time.max() - WL_swf.SUBMIT_TIME.min(),\n", + " \"length\": WL_swf.SUBMIT_TIME.max() - WL_swf.SUBMIT_TIME.min(),\n", + " \"mean_slowdown\": WL_swf.slowdown.mean(),\n", + " \"max_slowdown\": WL_swf.slowdown.max(),\n", + " \"mean_turnaround_time\": WL_swf.turnaround_time.mean(),\n", + " \"max_turnaround_time\": WL_swf.turnaround_time.max()\n", + " })\n", + " dfs.append(original_metrics)\n", + "\n", + "all = pd.concat(dfs, keys=[\"2013_filtered\", \"2014_filtered\"])\n", + "\n", + "print(\"Simulation results:\")\n", + "display(all[[\"makespan\",\"length\",\"nb_jobs\",\"mean_waiting_time\",\"median_waiting_time\",\"max_waiting_time\",\"mean_slowdown\",\"max_slowdown\",\"mean_turnaround_time\",\"max_turnaround_time\"]])\n", + "\n", + "print(\"Simulation results (with readable durations):\")\n", + "all = all.astype({\"makespan\":'timedelta64[s]',\"length\":'timedelta64[s]',\n", + " \"mean_waiting_time\":'timedelta64[s]',\"max_waiting_time\":'timedelta64[s]',\"median_waiting_time\":'timedelta64[s]',\n", + " \"mean_turnaround_time\":'timedelta64[s]',\"max_turnaround_time\":'timedelta64[s]'})\n", + "display(all[[\"makespan\",\"length\",\"nb_jobs\",\"mean_waiting_time\",\"median_waiting_time\",\"max_waiting_time\",\"mean_slowdown\",\"max_slowdown\",\"mean_turnaround_time\",\"max_turnaround_time\"]])" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[<matplotlib.axis.YTick at 0x7fb62f453700>,\n", + " <matplotlib.axis.YTick at 0x7fb62f452f80>,\n", + " <matplotlib.axis.YTick at 0x7fb62f51a620>,\n", + " <matplotlib.axis.YTick at 0x7fb62f2a8100>,\n", + " <matplotlib.axis.YTick at 0x7fb62f2a8580>,\n", + " <matplotlib.axis.YTick at 0x7fb62f2a8cd0>,\n", + " <matplotlib.axis.YTick at 0x7fb62f2a9420>,\n", + " <matplotlib.axis.YTick at 0x7fb62f2a9b70>,\n", + " <matplotlib.axis.YTick at 0x7fb62f2aa2c0>]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig,ax=plt.subplots()\n", + "WL_filtered2013.WAIT_TIME.plot(kind=\"box\", showmeans=True, showfliers=False, ax=ax)\n", + "y_l, y_r = ax.get_ylim(); y_l, y_r = int(y_l / (3600)), int(y_r / (3600))\n", + "ax.set_yticks(np.arange(y_l * 3600, (y_r+1) * 3600, step=3600), labels=np.arange(y_l, y_r+1, step=1)) " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ "Simulation launched in grid5000." ] }, @@ -314,8 +663,22 @@ } ], "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" }, "orig_nbformat": 4 },