From 6cb6c182d0fdfa62c5f92f79297fe421f9f791c4 Mon Sep 17 00:00:00 2001
From: AxelCarayon <axel.carayon@gmail.com>
Date: Tue, 8 Mar 2022 11:05:02 +0100
Subject: [PATCH] refactoring

---
 dockerModule.py       |  23 ++++++
 loadExperiment.py     |  94 ++++++-----------------
 registerExperiment.py | 171 ++++++++++++++++--------------------------
 utils.py              |  11 +++
 4 files changed, 121 insertions(+), 178 deletions(-)
 create mode 100644 dockerModule.py
 create mode 100644 utils.py

diff --git a/dockerModule.py b/dockerModule.py
new file mode 100644
index 0000000..6e01962
--- /dev/null
+++ b/dockerModule.py
@@ -0,0 +1,23 @@
+import subprocess
+
+def getWorkir(dockerFile) -> str :
+    workdir = "/" 
+    with open(dockerFile,"r") as file:
+        for line in file.read().splitlines():
+            if line.startswith("WORKDIR"):
+                workdir = line.split(" ")[1]
+    return workdir
+
+def buildDockerImage(imageName) -> None:
+    print("Building the docker image ...")
+    try :
+        subprocess.run(f"docker build -t {imageName.lower()}experiment ./",shell=True).check_returncode()
+    except :
+        subprocess.run(f"sudo docker build -t {imageName.lower()}experiment ./",shell=True).check_returncode()
+
+def runDockerImage(imageName,workDirectory) -> None:
+    print("binding docker image to the current directory and running it...")
+    try:
+        subprocess.run(f"docker run -it --mount type=bind,source=\"$PWD\",target={workDirectory} {imageName.lower()}experiment",shell=True).check_returncode()
+    except :
+        subprocess.run(f"sudo docker run -it --mount type=bind,source=\"$PWD\",target={workDirectory} {imageName.lower()}experiment",shell=True).check_returncode()
\ No newline at end of file
diff --git a/loadExperiment.py b/loadExperiment.py
index fa03e69..55d53e7 100644
--- a/loadExperiment.py
+++ b/loadExperiment.py
@@ -4,57 +4,30 @@ import subprocess
 import yaml
 import hashlib
 import warnings
+from utils import typeEnterToContinue
+from dockerModule import getWorkir, buildDockerImage, runDockerImage
 
-INPUT_FOLDER = "inputs"
-OUTPUT_FOLDER = "outputs"
-
-repo = None
-folder = None
-
-commandsFile = None
-instructionFile = None
-
-inputFiles = []
-outputFiles = []
-
-dockerfileIsPresent = False
-beforeHash = None
+params = None
 
 def init(repository,branch) -> None :
-    global repo, folder
     folder = repository.split('/')[-1].split('.')[0]
-    if os.path.exists(folder) :
-        print(f"Folder ./{folder} already exists, do you want to delete it ? (y/n)")
-        answer = input()
-        if answer == 'y' :
-            os.system(f"rm -rf ./{folder}")
-        else :
-            print("Aborting")
-            exit(0)
-    git.Git("./").clone(repository)
-    repo = git.Repo(folder)
+    if not os.path.exists(folder) :
+        git.Git("./").clone(repository)
     try : 
-        repo.git.checkout(branch)
+        git.Repo(folder).git.checkout(branch)
     except git.exc.GitCommandError : 
         raise Exception(f"Branch {branch} not found in the repository")
     os.chdir(folder)
 
 def getParameters() -> None :
-    global commandsFile, inputFiles, outputFiles, beforeHash, instructionFile, dockerfileIsPresent
+    global params
     if not (os.path.exists('experimentResume.yaml')):
         raise Exception("No exeperimentResume.yaml file found, the branch is not an exeperiment")
     with open('experimentResume.yaml', 'r') as stream:
-        parameters = yaml.safe_load(stream)
-        commandsFile = parameters.get('commands')
-        outputFiles = parameters.get('outputs')
-        inputFiles = parameters.get('inputs')
-        beforeHash = parameters.get('checksums')
-        instructionFile = parameters.get('instructions')
-        dockerfileIsPresent = parameters.get('dockerfile')
-
+        params = yaml.safe_load(stream)
 
 def runExperiment() -> None :
-    file = open(commandsFile, "r")
+    file = open(params.get('commands'), "r")
     for line in file.read().splitlines():
         print(f"running {line} ...")
         process = subprocess.run(line,shell=True)
@@ -62,14 +35,11 @@ def runExperiment() -> None :
         print("done")
 
 def checkForInstructions() -> None :
-    if (instructionFile != None) :
-        print("You can check the instructions for the experiment in the file : " + instructionFile)
+    if (params.get('instruction') != None) :
+        print("You can check the instructions for the experiment in the file : " + params.get('instruction'))
     else :
         warnings.warn("No instructions for the experiment found in the repository")
-    print("Run the exepriment and then press enter when it's done")
-    done = "nope"
-    while (done != "") :
-        done = input()
+    typeEnterToContinue("Run the exepriment and then press enter when it's done")
 
 def genChecksum(file) -> str :
     hash_md5 = hashlib.md5()
@@ -80,57 +50,35 @@ def genChecksum(file) -> str :
 
 def genChecksums() -> list[dict]:
     checksums = []
-    for file in os.listdir(OUTPUT_FOLDER) :
+    outputFolder = params.get('outputFolder')
+    for file in os.listdir(outputFolder) :
         if not file.endswith(".gitkeep"):
-            checksums.append({f"{OUTPUT_FOLDER}/{file}" : genChecksum(f'{OUTPUT_FOLDER}/{file}')})
+            checksums.append({f"{outputFolder}/{file}" : genChecksum(f'{outputFolder}/{file}')})
     return checksums
 
 
 def compareChecksums() -> bool:
     changes = False
-    for (dict1, dict2) in zip(beforeHash, genChecksums()):
+    for (dict1, dict2) in zip(params.get('checksums'), genChecksums()):
         for (key, value) in dict1.items():
             if dict2.get(key) != value :
                 warnings.warn(f"{key} has changed")
                 changes = True
     return changes
 
-
-def buildDockerImage() -> None:
-    print("Building the docker image ...")
-    try :
-        subprocess.run(f"docker build -t experimentreproduction ./",shell=True).check_returncode()
-    except :
-        subprocess.run(f"sudo docker build -t experimentreproduction ./",shell=True).check_returncode()
-
-def getWorkir() -> str :
-    workdir = "/" 
-    with open("Dockerfile","r") as file:
-        for line in file.read().splitlines():
-            if line.startswith("WORKDIR"):
-                workdir = line.split(" ")[1]
-    return workdir
-
-def runDockerImage() -> None:
-    print("binding docker image to the current directory and running it...")
-    try:
-        subprocess.run(f"docker run -it --mount type=bind,source=\"$PWD\",target={getWorkir()} experimentreproduction",shell=True).check_returncode()
-    except :
-        subprocess.run(f"sudo docker run -it --mount type=bind,source=\"$PWD\",target={getWorkir()} experimentreproduction",shell=True).check_returncode()
-
-
 def run(repository, branch) -> None :
     print("Initializing the experiment repository ...")
     init(repository, branch)
     print("Getting the experiment parameters ...")
     getParameters()
     print("Running the experiment ...")
-    if (dockerfileIsPresent) :
+    if (params.get('dockerfile')) :
         print("Dockerimage was found ! Using it to run the experiment...")
-        buildDockerImage()
-        runDockerImage()
+        name = params.get("name")
+        buildDockerImage(name)
+        runDockerImage(name,getWorkir("Dockerfile"))
     else:
-        if (commandsFile != None) : 
+        if (params.get('commands') != None) : 
             runExperiment()
         else :
             checkForInstructions()
diff --git a/registerExperiment.py b/registerExperiment.py
index e763be8..4755742 100644
--- a/registerExperiment.py
+++ b/registerExperiment.py
@@ -1,36 +1,37 @@
-from multiprocessing.connection import answer_challenge
 import os
-from unicodedata import name
 import git
 import subprocess
 import yaml
 import hashlib
 import warnings
+from dockerModule import getWorkir, buildDockerImage, runDockerImage
+from utils import yesOrNo
 
 EXPERIMENT_RESUME = "experimentResume.yaml"
 DOCKERFILE = "Dockerfile"
 
 path = "./"
 
-repository = None
-
-inputFolder = None
-inputFiles = []
-
-paramsFolder = None
-paramsFiles = []
-
-commandsFile = None
-instructionFile = None
-
-experimentName = None
-
-outputFolder = None
-outputFiles = []
-
-currentTag = None
-tags = None
-
+gitContent = {
+    'repository' : None,
+    'branchName' : None,
+    'currentTag' : None,
+    'tags' : None
+}
+
+experiment = {
+    'name' : None,
+    'inputFolder' : None,
+    'inputFiles' : [],
+    'paramsFolder' : None,
+    'paramsFiles' : [],
+    'commandsFile' : None,
+    'instructionFile' : None,
+    'outputFolder' : None,
+    'outputFiles' : [],
+    'dockerfile' : None,
+    'checksums' : []
+}
 
 def isGitRepo(path) -> bool:
     try:
@@ -39,58 +40,32 @@ def isGitRepo(path) -> bool:
     except git.exc.InvalidGitRepositoryError:
         return False
 
-
-
 def dockerfileIsPresent() -> bool:
     if fileExists(DOCKERFILE):
-        answer = input("A dockerfile was found ! It will be used to reproduce the experiment. Is that ok for you ? (y/n)")
-        if answer == "n":
+        if yesOrNo("A dockerfile was found ! It will be used to reproduce the experiment. Is that ok for you ?"):
+            return True
+        else : 
             raise Exception("""Remove the dockerfile and try again""")
-        return True
     else :
         return False
 
-def buildDockerImage() -> None:
-    print("Building the docker image ...")
-    try :
-        subprocess.run(f"docker build -t {experimentName.lower()}experiment ./",shell=True).check_returncode()
-    except :
-        subprocess.run(f"sudo docker build -t {experimentName.lower()}experiment ./",shell=True).check_returncode()
-
-def getWorkir() -> str :
-    workdir = "/" 
-    with open(DOCKERFILE,"r") as file:
-        for line in file.read().splitlines():
-            if line.startswith("WORKDIR"):
-                workdir = line.split(" ")[1]
-    return workdir
-
-def runDockerImage() -> None:
-    print("binding docker image to the current directory and running it...")
-    try:
-        subprocess.run(f"docker run -it --mount type=bind,source=\"$PWD\",target={getWorkir()} {experimentName.lower()}experiment",shell=True).check_returncode()
-    except :
-        subprocess.run(f"sudo docker run -it --mount type=bind,source=\"$PWD\",target={getWorkir()} {experimentName.lower()}experiment",shell=True).check_returncode()
-    #TODO : vérifier si la reproduction avec un Dockerfile marche dans l'autre sens
-
 def init(pathInput) -> None :
-    global repository,path,experimentName,tags, currentTag
     if isGitRepo(pathInput):
-        path += pathInput
+        path = pathInput
         if not (pathInput[len(pathInput)-1] == "/"):
             path+="/"
-        repository = git.Repo(path)
-        experimentName = repository.active_branch.name
+        gitContent['repository'] = git.Repo(path)
+        experiment["name"] = gitContent['repository'].active_branch.name
         os.chdir(path)
     else :
         raise Exception(f"{pathInput} is not a git repository")
-    tags = repository.tags
-    currentTag = repository.git.describe('--tags')
+    gitContent['tags'] = gitContent['repository'].tags
+    gitContent['currentTag'] = gitContent['repository'].git.describe('--tags')
     if not(currentVersionIsTagged()):
         raise Exception("Current version is not tagged, you can only reproduce an experiment from a tagged version.")
 
 def currentVersionIsTagged() -> bool:
-    return currentTag in tags
+    return gitContent['currentTag'] in gitContent['tags']
 
 def fileExists(fileName) -> bool:
     return os.path.exists(fileName)
@@ -99,20 +74,18 @@ def folderExists(folderName) -> bool:
     return os.path.isdir(folderName)
 
 def askForInputFolder() -> None:
-    global inputFolder
     answer = input("If you use input data, where are they stored ? Give the path from the root of the repository : ")
     if answer == "":
         warnings.warn("No input folder given, no input files will be registered")
     else:
         if not folderExists(answer):
-            raise Exception(f"{path}/{answer} folder does not exist")
+            raise Exception(f"{answer} folder does not exist")
         else:
             if not answer.endswith("/"):
                 answer+="/"
-            inputFolder = answer
+            experiment['inputFolder'] = answer
 
 def askForOutputFolder() -> None:
-    global outputFolder
     answer = input("Where are the outputs generated ? Give the path from the root of the repository : ")
     if answer == "":
         warnings.warn("No output folder given, no output files will be registered")
@@ -122,29 +95,27 @@ def askForOutputFolder() -> None:
         else:
             if not answer.endswith("/"):
                 answer+="/"
-            outputFolder = answer
+            experiment['outputFolder'] = answer
 
 def askForParamsFolder() -> None:
-    global paramsFolder
     answer = input("In which folder do you store your parameters ? Give the path from the root of the repository : ")
     if answer == "":
         warnings.warn("No parameters folder given, no parameters will be registered")
     else:
         if not folderExists(answer):
-            raise Exception(f"{path}/{answer} folder does not exist")
+            raise Exception(f"{answer} folder does not exist")
         else:
             if not answer.endswith("/"):
                 answer+="/"
-            paramsFolder = answer
+            experiment['paramsFolder'] = answer
 
 def askForCommandsFile() -> None:
-    global commandsFile
     commandsFile = input("Enter the name of the commands file: ")
     if not fileExists(commandsFile):
         raise Exception(f"{commandsFile} file does not exist")
+    experiment['commandsFile'] = commandsFile
 
 def askForInstructionFile() -> None :
-    global instructionFile
     print("If you have an instruction file, enter its name, otherwise press enter")
     instructionFile = input()
     if instructionFile == "":
@@ -152,16 +123,17 @@ def askForInstructionFile() -> None :
     else:
         if not fileExists(instructionFile):
             raise Exception(f"{instructionFile} file does not exist")
+        experiment['insuctionFile'] = instructionFile
 
 def registeringExperimentInputs(inputs) -> None:
-    with open(commandsFile, "w") as file:
+    with open(experiment['commandsFile'], "w") as file:
         for input in inputs:
             file.write(input+"\n")
 
 
 def runExperiment() -> None:
     print("Trying to run experiment")
-    file = open(commandsFile, "r")
+    file = open(experiment['commandsFile'], "r")
     for line in file.read().splitlines():
         print(f"running {line} ...")
         process = subprocess.run(line,shell=True)
@@ -169,26 +141,29 @@ def runExperiment() -> None:
         print("done")
 
 def scanInputFiles() -> None:
-    for file in os.listdir(inputFolder):
+    for file in os.listdir(experiment['inputFolder']):
         if not file.endswith(".gitkeep"):
-            inputFiles.append(f"{inputFolder}{file}")
+            experiment['inputFiles'].append(f"{experiment['inputFolder']}{file}")
 
 def scanOutputsGenerated() -> None:
-    for file in os.listdir(outputFolder):
+    for file in os.listdir(experiment['outputFolder']):
         if not file.endswith(".gitkeep"):
-            outputFiles.append(f"{outputFolder}{file}")
+            experiment['outputFiles'].append(f"{experiment['outputFolder']}{file}")
 
 def scanParameters() -> None:
-    for file in os.listdir(paramsFolder):
+    for file in os.listdir(experiment['paramsFolder']):
         if not file.endswith(".gitkeep"):
-            paramsFiles.append(f"{paramsFolder}{file}")
+            experiment['paramsFiles'].append(f"{experiment['paramsFolder']}{file}")
 
-def isNotAnOutputfile(file) -> bool: return file not in outputFiles
-def isNotAnInputfile(file) -> bool: return file not in inputFiles
-def isNotAParamFile(file) -> bool: return file not in paramsFiles
+def isNotAnOutputfile(file) -> bool: return file not in experiment['outputFiles']
+def isNotAnInputfile(file) -> bool: return file not in experiment['inputFiles']
+def isNotAParamFile(file) -> bool: return file not in experiment['paramsFiles']
 
 def checkGeneratedFiles() -> None : 
-    editedFiles = [ item.a_path for item in repository.index.diff(None) ]+ [ item.a_path for item in repository.index.diff(repository.head.name) ] + repository.untracked_files
+    repository = gitContent['repository']
+    editedFiles = [ item.a_path for item in repository.index.diff(None) ] \
+    +[ item.a_path for item in repository.index.diff(repository.head.name) ] \
+    + repository.untracked_files
     outOfPlaceFiles = []
     logFile = open("outOfPlaceFiles.log","w")
     for file in editedFiles:
@@ -209,31 +184,16 @@ def checkGeneratedFiles() -> None :
 
 
 def writeInYaml() -> None:
-    if fileExists(EXPERIMENT_RESUME):
-        with open(EXPERIMENT_RESUME, "r") as yamlFile:
-            cur_yaml = yaml.safe_load(yamlFile)
-            cur_yaml.update({"name":experimentName})
-            cur_yaml.update({"commands":commandsFile})
-            cur_yaml.update({"inputs":inputFiles})
-            cur_yaml.update({"outputs":outputFiles})
-            cur_yaml.update({"params":paramsFiles})
-            cur_yaml.update({"instruction":instructionFile})
-            cur_yaml.update({"dockerfile":fileExists(DOCKERFILE)})
-            checksums = {"checksums":genChecksums()}
-            cur_yaml.update(checksums)
-        with open(EXPERIMENT_RESUME, 'w') as yamlFile:
-            yaml.safe_dump(cur_yaml, yamlFile)
-    else:
-        with open(EXPERIMENT_RESUME, "w") as yamlFile:
-            yaml.safe_dump({"name":experimentName, "commands":commandsFile, "inputs":inputFiles, "outputs":outputFiles, "params":paramsFiles, "instruction":instructionFile, "dockerfile":fileExists(DOCKERFILE), "checksums":genChecksums()}, yamlFile)
-
+    with open(EXPERIMENT_RESUME, "w") as file:
+        yaml.safe_dump(experiment, file)
 
 def pushBranch(version=1) -> None:
     print("Pushing branch...")
+    experimentName = experiment['name'] ; repository = gitContent['repository']
     while f"{experimentName}Experiment{version}" in repository.remote().refs:
         print(f"{experimentName}Experiment{version} already exists")
         version += 1
-    newTag = f"{currentTag}-e{version}"
+    newTag = f"{gitContent['currentTag']}-e{version}"
     print(f"creating {experimentName}Experiment{version} branch and pushing changes to it ...")
     repository.git.checkout(b=f"{experimentName}Experiment{version}")
     repository.git.add(all=True)
@@ -253,7 +213,7 @@ def genChecksum(file) -> str :
 
 def genChecksums() -> list[dict]:
     checksums = []
-    for file in outputFiles:
+    for file in experiment['outputFiles']:
         checksums.append({file : genChecksum(file)})
     return checksums
 
@@ -265,8 +225,9 @@ def askFolders() -> None :
 
 def reproduceExperiment() -> None:
     if dockerfileIsPresent() :
-        buildDockerImage()
-        runDockerImage()
+        buildDockerImage(experiment['name'])
+        runDockerImage(experiment['name'], getWorkir(DOCKERFILE))
+        experiment['dockerfile'] = DOCKERFILE
     else:
         userInput = input("Do you have a pre-recorded commands file? (y/n)")
         if userInput == "y":
@@ -279,16 +240,16 @@ def reproduceExperiment() -> None:
                 done = input("Run your experiment and then type 'done' when you are done : ")
 
 def scanAfterExecution() -> None:
-    if inputFolder != None :
+    if experiment['inputFolder'] != None :
         scanInputFiles()
-    if outputFolder != None :
+    if experiment['outputFolder'] != None :
         scanOutputsGenerated()
-    if paramsFolder != None :
+    if experiment['paramsFolder'] != None :
         scanParameters()
 
 def run(folder) -> None :
     init(folder)
-    repository.active_branch.checkout()
+    gitContent['repository'].active_branch.checkout()
     askFolders()
     reproduceExperiment()
     scanAfterExecution()
@@ -297,4 +258,4 @@ def run(folder) -> None :
     print(f"Please check the {EXPERIMENT_RESUME} file, if everything is correct, press enter to continue, otherwise type \"abort\"")
     if input() == "abort":
         raise Exception("Aborted")
-    pushBranch()
+    pushBranch()
\ No newline at end of file
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..2fa32ac
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,11 @@
+
+def yesOrNo(question) -> bool:
+    while True:
+        answer = input(question + " (y/n): ")
+        if answer == 'y' or answer == 'n':
+            return answer == 'y'
+
+def typeEnterToContinue(text) -> None:
+    answer = "not empty"
+    while (answer != "") :
+        answer = input(text)
\ No newline at end of file
-- 
GitLab