From 6cb6c182d0fdfa62c5f92f79297fe421f9f791c4 Mon Sep 17 00:00:00 2001 From: AxelCarayon <axel.carayon@gmail.com> Date: Tue, 8 Mar 2022 11:05:02 +0100 Subject: [PATCH] refactoring --- dockerModule.py | 23 ++++++ loadExperiment.py | 94 ++++++----------------- registerExperiment.py | 171 ++++++++++++++++-------------------------- utils.py | 11 +++ 4 files changed, 121 insertions(+), 178 deletions(-) create mode 100644 dockerModule.py create mode 100644 utils.py diff --git a/dockerModule.py b/dockerModule.py new file mode 100644 index 0000000..6e01962 --- /dev/null +++ b/dockerModule.py @@ -0,0 +1,23 @@ +import subprocess + +def getWorkir(dockerFile) -> str : + workdir = "/" + with open(dockerFile,"r") as file: + for line in file.read().splitlines(): + if line.startswith("WORKDIR"): + workdir = line.split(" ")[1] + return workdir + +def buildDockerImage(imageName) -> None: + print("Building the docker image ...") + try : + subprocess.run(f"docker build -t {imageName.lower()}experiment ./",shell=True).check_returncode() + except : + subprocess.run(f"sudo docker build -t {imageName.lower()}experiment ./",shell=True).check_returncode() + +def runDockerImage(imageName,workDirectory) -> None: + print("binding docker image to the current directory and running it...") + try: + subprocess.run(f"docker run -it --mount type=bind,source=\"$PWD\",target={workDirectory} {imageName.lower()}experiment",shell=True).check_returncode() + except : + subprocess.run(f"sudo docker run -it --mount type=bind,source=\"$PWD\",target={workDirectory} {imageName.lower()}experiment",shell=True).check_returncode() \ No newline at end of file diff --git a/loadExperiment.py b/loadExperiment.py index fa03e69..55d53e7 100644 --- a/loadExperiment.py +++ b/loadExperiment.py @@ -4,57 +4,30 @@ import subprocess import yaml import hashlib import warnings +from utils import typeEnterToContinue +from dockerModule import getWorkir, buildDockerImage, runDockerImage -INPUT_FOLDER = "inputs" -OUTPUT_FOLDER = "outputs" - -repo = None -folder = None - -commandsFile = None -instructionFile = None - -inputFiles = [] -outputFiles = [] - -dockerfileIsPresent = False -beforeHash = None +params = None def init(repository,branch) -> None : - global repo, folder folder = repository.split('/')[-1].split('.')[0] - if os.path.exists(folder) : - print(f"Folder ./{folder} already exists, do you want to delete it ? (y/n)") - answer = input() - if answer == 'y' : - os.system(f"rm -rf ./{folder}") - else : - print("Aborting") - exit(0) - git.Git("./").clone(repository) - repo = git.Repo(folder) + if not os.path.exists(folder) : + git.Git("./").clone(repository) try : - repo.git.checkout(branch) + git.Repo(folder).git.checkout(branch) except git.exc.GitCommandError : raise Exception(f"Branch {branch} not found in the repository") os.chdir(folder) def getParameters() -> None : - global commandsFile, inputFiles, outputFiles, beforeHash, instructionFile, dockerfileIsPresent + global params if not (os.path.exists('experimentResume.yaml')): raise Exception("No exeperimentResume.yaml file found, the branch is not an exeperiment") with open('experimentResume.yaml', 'r') as stream: - parameters = yaml.safe_load(stream) - commandsFile = parameters.get('commands') - outputFiles = parameters.get('outputs') - inputFiles = parameters.get('inputs') - beforeHash = parameters.get('checksums') - instructionFile = parameters.get('instructions') - dockerfileIsPresent = parameters.get('dockerfile') - + params = yaml.safe_load(stream) def runExperiment() -> None : - file = open(commandsFile, "r") + file = open(params.get('commands'), "r") for line in file.read().splitlines(): print(f"running {line} ...") process = subprocess.run(line,shell=True) @@ -62,14 +35,11 @@ def runExperiment() -> None : print("done") def checkForInstructions() -> None : - if (instructionFile != None) : - print("You can check the instructions for the experiment in the file : " + instructionFile) + if (params.get('instruction') != None) : + print("You can check the instructions for the experiment in the file : " + params.get('instruction')) else : warnings.warn("No instructions for the experiment found in the repository") - print("Run the exepriment and then press enter when it's done") - done = "nope" - while (done != "") : - done = input() + typeEnterToContinue("Run the exepriment and then press enter when it's done") def genChecksum(file) -> str : hash_md5 = hashlib.md5() @@ -80,57 +50,35 @@ def genChecksum(file) -> str : def genChecksums() -> list[dict]: checksums = [] - for file in os.listdir(OUTPUT_FOLDER) : + outputFolder = params.get('outputFolder') + for file in os.listdir(outputFolder) : if not file.endswith(".gitkeep"): - checksums.append({f"{OUTPUT_FOLDER}/{file}" : genChecksum(f'{OUTPUT_FOLDER}/{file}')}) + checksums.append({f"{outputFolder}/{file}" : genChecksum(f'{outputFolder}/{file}')}) return checksums def compareChecksums() -> bool: changes = False - for (dict1, dict2) in zip(beforeHash, genChecksums()): + for (dict1, dict2) in zip(params.get('checksums'), genChecksums()): for (key, value) in dict1.items(): if dict2.get(key) != value : warnings.warn(f"{key} has changed") changes = True return changes - -def buildDockerImage() -> None: - print("Building the docker image ...") - try : - subprocess.run(f"docker build -t experimentreproduction ./",shell=True).check_returncode() - except : - subprocess.run(f"sudo docker build -t experimentreproduction ./",shell=True).check_returncode() - -def getWorkir() -> str : - workdir = "/" - with open("Dockerfile","r") as file: - for line in file.read().splitlines(): - if line.startswith("WORKDIR"): - workdir = line.split(" ")[1] - return workdir - -def runDockerImage() -> None: - print("binding docker image to the current directory and running it...") - try: - subprocess.run(f"docker run -it --mount type=bind,source=\"$PWD\",target={getWorkir()} experimentreproduction",shell=True).check_returncode() - except : - subprocess.run(f"sudo docker run -it --mount type=bind,source=\"$PWD\",target={getWorkir()} experimentreproduction",shell=True).check_returncode() - - def run(repository, branch) -> None : print("Initializing the experiment repository ...") init(repository, branch) print("Getting the experiment parameters ...") getParameters() print("Running the experiment ...") - if (dockerfileIsPresent) : + if (params.get('dockerfile')) : print("Dockerimage was found ! Using it to run the experiment...") - buildDockerImage() - runDockerImage() + name = params.get("name") + buildDockerImage(name) + runDockerImage(name,getWorkir("Dockerfile")) else: - if (commandsFile != None) : + if (params.get('commands') != None) : runExperiment() else : checkForInstructions() diff --git a/registerExperiment.py b/registerExperiment.py index e763be8..4755742 100644 --- a/registerExperiment.py +++ b/registerExperiment.py @@ -1,36 +1,37 @@ -from multiprocessing.connection import answer_challenge import os -from unicodedata import name import git import subprocess import yaml import hashlib import warnings +from dockerModule import getWorkir, buildDockerImage, runDockerImage +from utils import yesOrNo EXPERIMENT_RESUME = "experimentResume.yaml" DOCKERFILE = "Dockerfile" path = "./" -repository = None - -inputFolder = None -inputFiles = [] - -paramsFolder = None -paramsFiles = [] - -commandsFile = None -instructionFile = None - -experimentName = None - -outputFolder = None -outputFiles = [] - -currentTag = None -tags = None - +gitContent = { + 'repository' : None, + 'branchName' : None, + 'currentTag' : None, + 'tags' : None +} + +experiment = { + 'name' : None, + 'inputFolder' : None, + 'inputFiles' : [], + 'paramsFolder' : None, + 'paramsFiles' : [], + 'commandsFile' : None, + 'instructionFile' : None, + 'outputFolder' : None, + 'outputFiles' : [], + 'dockerfile' : None, + 'checksums' : [] +} def isGitRepo(path) -> bool: try: @@ -39,58 +40,32 @@ def isGitRepo(path) -> bool: except git.exc.InvalidGitRepositoryError: return False - - def dockerfileIsPresent() -> bool: if fileExists(DOCKERFILE): - answer = input("A dockerfile was found ! It will be used to reproduce the experiment. Is that ok for you ? (y/n)") - if answer == "n": + if yesOrNo("A dockerfile was found ! It will be used to reproduce the experiment. Is that ok for you ?"): + return True + else : raise Exception("""Remove the dockerfile and try again""") - return True else : return False -def buildDockerImage() -> None: - print("Building the docker image ...") - try : - subprocess.run(f"docker build -t {experimentName.lower()}experiment ./",shell=True).check_returncode() - except : - subprocess.run(f"sudo docker build -t {experimentName.lower()}experiment ./",shell=True).check_returncode() - -def getWorkir() -> str : - workdir = "/" - with open(DOCKERFILE,"r") as file: - for line in file.read().splitlines(): - if line.startswith("WORKDIR"): - workdir = line.split(" ")[1] - return workdir - -def runDockerImage() -> None: - print("binding docker image to the current directory and running it...") - try: - subprocess.run(f"docker run -it --mount type=bind,source=\"$PWD\",target={getWorkir()} {experimentName.lower()}experiment",shell=True).check_returncode() - except : - subprocess.run(f"sudo docker run -it --mount type=bind,source=\"$PWD\",target={getWorkir()} {experimentName.lower()}experiment",shell=True).check_returncode() - #TODO : vérifier si la reproduction avec un Dockerfile marche dans l'autre sens - def init(pathInput) -> None : - global repository,path,experimentName,tags, currentTag if isGitRepo(pathInput): - path += pathInput + path = pathInput if not (pathInput[len(pathInput)-1] == "/"): path+="/" - repository = git.Repo(path) - experimentName = repository.active_branch.name + gitContent['repository'] = git.Repo(path) + experiment["name"] = gitContent['repository'].active_branch.name os.chdir(path) else : raise Exception(f"{pathInput} is not a git repository") - tags = repository.tags - currentTag = repository.git.describe('--tags') + gitContent['tags'] = gitContent['repository'].tags + gitContent['currentTag'] = gitContent['repository'].git.describe('--tags') if not(currentVersionIsTagged()): raise Exception("Current version is not tagged, you can only reproduce an experiment from a tagged version.") def currentVersionIsTagged() -> bool: - return currentTag in tags + return gitContent['currentTag'] in gitContent['tags'] def fileExists(fileName) -> bool: return os.path.exists(fileName) @@ -99,20 +74,18 @@ def folderExists(folderName) -> bool: return os.path.isdir(folderName) def askForInputFolder() -> None: - global inputFolder answer = input("If you use input data, where are they stored ? Give the path from the root of the repository : ") if answer == "": warnings.warn("No input folder given, no input files will be registered") else: if not folderExists(answer): - raise Exception(f"{path}/{answer} folder does not exist") + raise Exception(f"{answer} folder does not exist") else: if not answer.endswith("/"): answer+="/" - inputFolder = answer + experiment['inputFolder'] = answer def askForOutputFolder() -> None: - global outputFolder answer = input("Where are the outputs generated ? Give the path from the root of the repository : ") if answer == "": warnings.warn("No output folder given, no output files will be registered") @@ -122,29 +95,27 @@ def askForOutputFolder() -> None: else: if not answer.endswith("/"): answer+="/" - outputFolder = answer + experiment['outputFolder'] = answer def askForParamsFolder() -> None: - global paramsFolder answer = input("In which folder do you store your parameters ? Give the path from the root of the repository : ") if answer == "": warnings.warn("No parameters folder given, no parameters will be registered") else: if not folderExists(answer): - raise Exception(f"{path}/{answer} folder does not exist") + raise Exception(f"{answer} folder does not exist") else: if not answer.endswith("/"): answer+="/" - paramsFolder = answer + experiment['paramsFolder'] = answer def askForCommandsFile() -> None: - global commandsFile commandsFile = input("Enter the name of the commands file: ") if not fileExists(commandsFile): raise Exception(f"{commandsFile} file does not exist") + experiment['commandsFile'] = commandsFile def askForInstructionFile() -> None : - global instructionFile print("If you have an instruction file, enter its name, otherwise press enter") instructionFile = input() if instructionFile == "": @@ -152,16 +123,17 @@ def askForInstructionFile() -> None : else: if not fileExists(instructionFile): raise Exception(f"{instructionFile} file does not exist") + experiment['insuctionFile'] = instructionFile def registeringExperimentInputs(inputs) -> None: - with open(commandsFile, "w") as file: + with open(experiment['commandsFile'], "w") as file: for input in inputs: file.write(input+"\n") def runExperiment() -> None: print("Trying to run experiment") - file = open(commandsFile, "r") + file = open(experiment['commandsFile'], "r") for line in file.read().splitlines(): print(f"running {line} ...") process = subprocess.run(line,shell=True) @@ -169,26 +141,29 @@ def runExperiment() -> None: print("done") def scanInputFiles() -> None: - for file in os.listdir(inputFolder): + for file in os.listdir(experiment['inputFolder']): if not file.endswith(".gitkeep"): - inputFiles.append(f"{inputFolder}{file}") + experiment['inputFiles'].append(f"{experiment['inputFolder']}{file}") def scanOutputsGenerated() -> None: - for file in os.listdir(outputFolder): + for file in os.listdir(experiment['outputFolder']): if not file.endswith(".gitkeep"): - outputFiles.append(f"{outputFolder}{file}") + experiment['outputFiles'].append(f"{experiment['outputFolder']}{file}") def scanParameters() -> None: - for file in os.listdir(paramsFolder): + for file in os.listdir(experiment['paramsFolder']): if not file.endswith(".gitkeep"): - paramsFiles.append(f"{paramsFolder}{file}") + experiment['paramsFiles'].append(f"{experiment['paramsFolder']}{file}") -def isNotAnOutputfile(file) -> bool: return file not in outputFiles -def isNotAnInputfile(file) -> bool: return file not in inputFiles -def isNotAParamFile(file) -> bool: return file not in paramsFiles +def isNotAnOutputfile(file) -> bool: return file not in experiment['outputFiles'] +def isNotAnInputfile(file) -> bool: return file not in experiment['inputFiles'] +def isNotAParamFile(file) -> bool: return file not in experiment['paramsFiles'] def checkGeneratedFiles() -> None : - editedFiles = [ item.a_path for item in repository.index.diff(None) ]+ [ item.a_path for item in repository.index.diff(repository.head.name) ] + repository.untracked_files + repository = gitContent['repository'] + editedFiles = [ item.a_path for item in repository.index.diff(None) ] \ + +[ item.a_path for item in repository.index.diff(repository.head.name) ] \ + + repository.untracked_files outOfPlaceFiles = [] logFile = open("outOfPlaceFiles.log","w") for file in editedFiles: @@ -209,31 +184,16 @@ def checkGeneratedFiles() -> None : def writeInYaml() -> None: - if fileExists(EXPERIMENT_RESUME): - with open(EXPERIMENT_RESUME, "r") as yamlFile: - cur_yaml = yaml.safe_load(yamlFile) - cur_yaml.update({"name":experimentName}) - cur_yaml.update({"commands":commandsFile}) - cur_yaml.update({"inputs":inputFiles}) - cur_yaml.update({"outputs":outputFiles}) - cur_yaml.update({"params":paramsFiles}) - cur_yaml.update({"instruction":instructionFile}) - cur_yaml.update({"dockerfile":fileExists(DOCKERFILE)}) - checksums = {"checksums":genChecksums()} - cur_yaml.update(checksums) - with open(EXPERIMENT_RESUME, 'w') as yamlFile: - yaml.safe_dump(cur_yaml, yamlFile) - else: - with open(EXPERIMENT_RESUME, "w") as yamlFile: - yaml.safe_dump({"name":experimentName, "commands":commandsFile, "inputs":inputFiles, "outputs":outputFiles, "params":paramsFiles, "instruction":instructionFile, "dockerfile":fileExists(DOCKERFILE), "checksums":genChecksums()}, yamlFile) - + with open(EXPERIMENT_RESUME, "w") as file: + yaml.safe_dump(experiment, file) def pushBranch(version=1) -> None: print("Pushing branch...") + experimentName = experiment['name'] ; repository = gitContent['repository'] while f"{experimentName}Experiment{version}" in repository.remote().refs: print(f"{experimentName}Experiment{version} already exists") version += 1 - newTag = f"{currentTag}-e{version}" + newTag = f"{gitContent['currentTag']}-e{version}" print(f"creating {experimentName}Experiment{version} branch and pushing changes to it ...") repository.git.checkout(b=f"{experimentName}Experiment{version}") repository.git.add(all=True) @@ -253,7 +213,7 @@ def genChecksum(file) -> str : def genChecksums() -> list[dict]: checksums = [] - for file in outputFiles: + for file in experiment['outputFiles']: checksums.append({file : genChecksum(file)}) return checksums @@ -265,8 +225,9 @@ def askFolders() -> None : def reproduceExperiment() -> None: if dockerfileIsPresent() : - buildDockerImage() - runDockerImage() + buildDockerImage(experiment['name']) + runDockerImage(experiment['name'], getWorkir(DOCKERFILE)) + experiment['dockerfile'] = DOCKERFILE else: userInput = input("Do you have a pre-recorded commands file? (y/n)") if userInput == "y": @@ -279,16 +240,16 @@ def reproduceExperiment() -> None: done = input("Run your experiment and then type 'done' when you are done : ") def scanAfterExecution() -> None: - if inputFolder != None : + if experiment['inputFolder'] != None : scanInputFiles() - if outputFolder != None : + if experiment['outputFolder'] != None : scanOutputsGenerated() - if paramsFolder != None : + if experiment['paramsFolder'] != None : scanParameters() def run(folder) -> None : init(folder) - repository.active_branch.checkout() + gitContent['repository'].active_branch.checkout() askFolders() reproduceExperiment() scanAfterExecution() @@ -297,4 +258,4 @@ def run(folder) -> None : print(f"Please check the {EXPERIMENT_RESUME} file, if everything is correct, press enter to continue, otherwise type \"abort\"") if input() == "abort": raise Exception("Aborted") - pushBranch() + pushBranch() \ No newline at end of file diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..2fa32ac --- /dev/null +++ b/utils.py @@ -0,0 +1,11 @@ + +def yesOrNo(question) -> bool: + while True: + answer = input(question + " (y/n): ") + if answer == 'y' or answer == 'n': + return answer == 'y' + +def typeEnterToContinue(text) -> None: + answer = "not empty" + while (answer != "") : + answer = input(text) \ No newline at end of file -- GitLab