Skip to content
Snippets Groups Projects
Commit 0787dddb authored by huongdm1896's avatar huongdm1896
Browse files

update kill process script -> use arg instead of hard code

parent a0d10e3f
Branches feature/test-coverage
Tags
No related merge requests found
...@@ -210,6 +210,7 @@ oarsub -I -l host=4,walltime=2 ...@@ -210,6 +210,7 @@ oarsub -I -l host=4,walltime=2
Reserve 4 hosts (GPU) (1 server + 3 clients) for 2 hours: Reserve 4 hosts (GPU) (1 server + 3 clients) for 2 hours:
```bash ```bash
oarsub -I -t exotic -p "gpu_count>0" -l {"cluster='drac'"}/host=4 # grenoble oarsub -I -t exotic -p "gpu_count>0" -l {"cluster='drac'"}/host=4 # grenoble
oarsub -I -p "gpu_count>0" -l {"cluster='chifflot'"}/host=4 # lille
``` ```
**Remark**: for now only 2 clusters, `chifflot` in Lille and `drac` in Grenoble are available for testing in more than 3 GPU nodes, maximum is 8 (`chifflot`) or 12 (`drac`) nodes. **Remark**: for now only 2 clusters, `chifflot` in Lille and `drac` in Grenoble are available for testing in more than 3 GPU nodes, maximum is 8 (`chifflot`) or 12 (`drac`) nodes.
......
# python3 kill.py --config ./config_instances_lille.json --user mdo --serverkey "python3 /home/mdo/eflwr/Flower_v1/server_1.py" --clientkey "/home/mdo/eflwr/Flower_v1/client_1.py"
#!/usr/bin/env python3
import os import os
import subprocess import subprocess
import json import json
import argparse
from pathlib import Path from pathlib import Path
# Determine script directory def parse_args():
current_dir = Path(__file__).resolve().parent parser = argparse.ArgumentParser(description="Kill background jobs by keyword and user")
parent_dir = current_dir.parent parser.add_argument("--config", required=True, help="Path to the config JSON file")
parser.add_argument("--user", required=True, help="Username to filter processes")
# Path to the config.json file parser.add_argument("--serverkey", required=True, help="Keyword to identify server process")
config_path = os.path.join(current_dir, "config.json") parser.add_argument("--clientkey", required=True, help="Keyword to identify client processes")
return parser.parse_args()
# Read the configuration
with open(config_path, "r") as file:
config = json.load(file)
# Get server and client details
server_ip = config["server"]["ip"]
server_args = " ".join(config["server"]["args"])
server_keyword = "server.py"
client_details = [ def check_and_kill_process(host, keyword, user):
{"ip": client["ip"], "keyword": f"{client['command']} {' '.join(client['args'])}"}
for client in config["clients"]
]
def check_and_kill_process(host, keyword):
""" """
Check for processes matching the keyword on the host and kill them. Check for processes matching the keyword on the host and kill them (only for the given user).
""" """
try: try:
print(f"========== Checking processes on {host} ==========") print(f"\n========== Checking processes on {host} for user '{user}' ==========")
# Command to grep processes current_pid = str(os.getpid())
grep_command = f"oarsh {host} pgrep -fa '{keyword}'"
# Command to find processes by user and keyword
# Get process details grep_command = f"oarsh {host} pgrep -u {user} -fa '{keyword}'"
result = subprocess.run(grep_command, shell=True, text=True, capture_output=True) result = subprocess.run(grep_command, shell=True, text=True, capture_output=True)
if result.returncode == 0 and result.stdout.strip(): if result.returncode == 0 and result.stdout.strip():
# Extract process details (PID and process name)
process_lines = result.stdout.strip().split("\n") process_lines = result.stdout.strip().split("\n")
for line in process_lines: for line in process_lines:
pid, process_name = line.split(" ", 1) pid, process_name = line.split(" ", 1)
print(f"Found process on {host}: PID={pid}, Process={process_name}")
if pid == current_pid:
print(f"Skipping current script process (PID={pid})")
continue
print(f"Found process: PID={pid}, Process={process_name}")
# Kill the process
kill_command = f"oarsh {host} kill -9 {pid}" kill_command = f"oarsh {host} kill -9 {pid}"
subprocess.run(kill_command, shell=True) subprocess.run(kill_command, shell=True)
print(f"Killed process {pid} on {host}") print(f"Killed process {pid} on {host}")
else: else:
print(f"No matching processes found on {host}.") print("No matching processes found.")
except Exception as e: except Exception as e:
print(f"An error occurred on {host}: {e}") print(f"Error on host {host}: {e}")
def main():
args = parse_args()
# Load config
config_path = Path(args.config).resolve()
with open(config_path, "r") as file:
config = json.load(file)["instances"]["1"]
# Get server IP
server_ip = config["server"]["ip"]
# Get client list
client_details = [client["ip"] for client in config["clients"]]
# Kill server processes
check_and_kill_process(server_ip, args.serverkey, args.user)
# Check and kill processes on the server # Kill client processes
check_and_kill_process(server_ip, server_keyword) for client_ip in client_details:
if client_ip:
check_and_kill_process(client_ip, args.clientkey, args.user)
# Check and kill processes on each client print("\n========== Process Management Completed ==========")
for client in client_details:
client_ip = client["ip"]
client_keyword = "client"
if client_ip:
check_and_kill_process(client_ip, client_keyword)
print("========== Process Management Completed ==========") if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment