From d5c0e3bb347edb5be28116bc0836c16f7ea4d138 Mon Sep 17 00:00:00 2001 From: BrunoDatoMeneses <bruno.dato.meneses@gmail.com> Date: Fri, 13 Sep 2019 18:54:15 +0200 Subject: [PATCH] ADD: full reinforcement xp with two ellsas --- AMOEBAonAMAK/src/agents/head/Head.java | 87 ++++++++++++++++--- ...Reinforcement1DSpatialRewardAndAction.java | 54 ++++++------ ...Reinforcement2DSpatialRewardAndAction.java | 2 + AMOEBAonAMAK/src/gui/ContextRendererFX.java | 6 +- AMOEBAonAMAK/src/kernel/AMOEBA.java | 11 +-- AMOEBAonAMAK/src/kernel/AmoebaData.java | 1 + 6 files changed, 116 insertions(+), 45 deletions(-) diff --git a/AMOEBAonAMAK/src/agents/head/Head.java b/AMOEBAonAMAK/src/agents/head/Head.java index 5b1bbcbe..ce3ab157 100644 --- a/AMOEBAonAMAK/src/agents/head/Head.java +++ b/AMOEBAonAMAK/src/agents/head/Head.java @@ -111,14 +111,19 @@ public class Head extends AmoebaAgent { if(getAmas().isReinforcement()) { + + int nb=0; Double meanNeighborsLastPredictions = null; + ArrayList<Context> usedNeighbors = new ArrayList<Context>(); + if(activatedNeighborsContexts.size()>0) { - int nb=0; + meanNeighborsLastPredictions = 0.0; for (Context ctxt : activatedNeighborsContexts) { if(ctxt.lastPrediction != null) { + usedNeighbors.add(ctxt); meanNeighborsLastPredictions += ctxt.lastPrediction; nb++; } @@ -133,7 +138,23 @@ public class Head extends AmoebaAgent { } if(meanNeighborsLastPredictions != null) { getAmas().data.oracleValue = (getAmas().data.oracleValue + meanNeighborsLastPredictions)/2; + + if(getAmas().data.oracleValue>0) { + + System.out.println("#################################################################"); + System.out.println("PCT " + getAmas().getPerceptionsAndActionState()); + System.out.println(getAmas().data.oracleValue); + for(Context ctxt : usedNeighbors) { + System.out.println(ctxt.getName() + " " + ctxt.lastPrediction); + } + System.out.println(usedNeighbors.size() + " " + nb); + + + } } + + + } @@ -182,11 +203,9 @@ public class Head extends AmoebaAgent { getAmas().data.executionTimes[1]=System.currentTimeMillis()- getAmas().data.executionTimes[1]; getAmas().data.executionTimes[2]=System.currentTimeMillis(); - if(getAmas().isReinforcement()) { - selfAnalysationOfContextsReinforcement(); - }else { - selfAnalysationOfContexts4(); - } + + selfAnalysationOfContexts4(); + getAmas().data.executionTimes[2]=System.currentTimeMillis()- getAmas().data.executionTimes[2]; getEnvironment().trace(TRACE_LEVEL.DEBUG, new ArrayList<String>(Arrays.asList("bestContext != null 2", "" + (bestContext != null)))); @@ -233,12 +252,24 @@ public class Head extends AmoebaAgent { } globalConfidence = globalConfidence / getAmas().getContexts().size(); + if (activatedNeighborsContexts.size() > 1) { - + + double bestNeighborLastPrediction = Double.NEGATIVE_INFINITY; + Context bestNeighbor = null; int i = 1; for (Context ctxt : activatedNeighborsContexts) { + + + if(ctxt.lastPrediction> bestNeighborLastPrediction) { + bestNeighborLastPrediction = ctxt.lastPrediction; + bestNeighbor = ctxt; + } + + + for (Context otherCtxt : activatedNeighborsContexts.subList(i, activatedNeighborsContexts.size())) { @@ -259,8 +290,17 @@ public class Head extends AmoebaAgent { } i++; + + + } + + getAmas().data.higherNeighborLastPredictionPercepts = new HashMap<String, Double>(); + for(Percept pct : getAmas().getPercepts()) { + getAmas().data.higherNeighborLastPredictionPercepts.put(pct.getName(),bestNeighbor.getRanges().get(pct).getCenter()); + } + } @@ -973,14 +1013,10 @@ public class Head extends AmoebaAgent { double currentDistanceToOraclePrediction; double minDistanceToOraclePrediction = Double.POSITIVE_INFINITY; - if(getAmas().data.oracleValue > 0) { - System.out.println("CYCLE " + getAmas().getCycle() + " ORACLE " + getAmas().data.oracleValue + " -------- HEAD 136"); - } + for (Context activatedContext : activatedContexts) { - if(getAmas().data.oracleValue > 0) { - System.out.println(activatedContext.getName()); - } + currentDistanceToOraclePrediction = activatedContext.getLocalModel() .distance(activatedContext.getCurrentExperiment()); @@ -1057,7 +1093,11 @@ public class Head extends AmoebaAgent { double minDistanceToOraclePrediction = Double.POSITIVE_INFINITY; for (Context activatedContext : activatedContexts) { - //System.out.println(activatedContext.getName()); + + + + + currentDistanceToOraclePrediction = activatedContext.getLocalModel() .distance(activatedContext.getCurrentExperiment()); getAmas().data.distanceToRegression = currentDistanceToOraclePrediction; @@ -1068,12 +1108,25 @@ public class Head extends AmoebaAgent { if (!activatedContext.getLocalModel().finishedFirstExperiments()) { activatedContext.getLocalModel().updateModel(activatedContext.getCurrentExperiment(), getAmas().data.learningSpeed); getAmas().data.contextNotFinished = true; + + if(getAmas().data.oracleValue>0) { + + System.out.println(activatedContext.getName()); + + + } } else if (currentDistanceToOraclePrediction < getAverageRegressionPerformanceIndicator()) { //else if (currentDistanceToOraclePrediction < regressionPerformance.getPerformanceIndicator()) { activatedContext.getLocalModel().updateModel(activatedContext.getCurrentExperiment(), getAmas().data.learningSpeed); + if(getAmas().data.oracleValue>0) { + + System.out.println(activatedContext.getName()); + + + } } if (currentDistanceToOraclePrediction < minDistanceToOraclePrediction) { @@ -1474,6 +1527,12 @@ public class Head extends AmoebaAgent { this.getAmas().data.criticity = criticity; } + + public HashMap<String, Double> getHigherNeighborLastPredictionPercepts() { + return getAmas().data.higherNeighborLastPredictionPercepts; + } + + /** * Gets the action. * diff --git a/AMOEBAonAMAK/src/experiments/SimpleReinforcement1DSpatialRewardAndAction.java b/AMOEBAonAMAK/src/experiments/SimpleReinforcement1DSpatialRewardAndAction.java index 09b1c3c3..1214086e 100644 --- a/AMOEBAonAMAK/src/experiments/SimpleReinforcement1DSpatialRewardAndAction.java +++ b/AMOEBAonAMAK/src/experiments/SimpleReinforcement1DSpatialRewardAndAction.java @@ -32,7 +32,7 @@ import utils.XmlConfigGenerator; public abstract class SimpleReinforcement1DSpatialRewardAndAction { /* Learn and Test */ public static final int MAX_STEP_PER_EPISODE = 200; - public static final int N_LEARN = 400;//400 + public static final int N_LEARN = 1000;//400 public static final int N_TEST = 100; /* Exploration */ @@ -104,32 +104,37 @@ public abstract class SimpleReinforcement1DSpatialRewardAndAction { */ public static class AmoebaQL implements LearningAgent { public AMOEBA amoebaSpatialReward; - public AMOEBA amoebaControlModel; + //public AMOEBA amoebaControlModel; public double lr = 0.8; public double gamma = 0.9; private Random rand = new Random(); public AmoebaQL() { amoebaSpatialReward = setupSpatialReward(); - amoebaControlModel = setupControlModel(); + //amoebaControlModel = setupControlModel(); } @Override public HashMap<String, Double> choose(HashMap<String, Double> state, Environment env) { -// HashMap<String, Double> bestActions = amoebaSpatialReward.maximize(state); -// double a1 = bestActions.getOrDefault("a1", 0.0); -// double a2 = bestActions.getOrDefault("a2", 0.0); -// if(a1 == 0.0) { -// a1 = rand.nextBoolean() ? -1 : 1; -// } -// if(a2 == 0.0) { -// a2 = rand.nextBoolean() ? -1 : 1; -// } - +// HashMap<String, Double> stateWithVizuAdded = new HashMap<String, Double>(state); +// stateWithVizuAdded.put("p2", 0.0); +// stateWithVizuAdded.put("oracle", 0.0); +// HashMap<String, Double> bestFuturePosition = amoebaSpatialReward.reinforcementRequest(stateWithVizuAdded); +// // HashMap<String, Double> action = new HashMap<String, Double>(); -// action.put("a1", a1); -// action.put("a2", a2); +// if(bestFuturePosition!=null) { +// HashMap<String, Double> requestForControlModel = new HashMap<String, Double>(); +// requestForControlModel.put("pCurrent", state.get("p1")); +// requestForControlModel.put("pGoal", bestFuturePosition.get("p1")); +// +// double bestAction = amoebaControlModel.request(requestForControlModel); +// +// +// action.put("a1", bestAction); +// } +// action = env.randomAction(); +// // return action; return null; } @@ -149,12 +154,11 @@ public abstract class SimpleReinforcement1DSpatialRewardAndAction { - positionAndReward.put("p2",0.0); - System.out.println("ControlModel " + previousStateCurrentStateAction + " ---------------- SIMPLE REIN XP 149"); - System.out.println("SpatialReward " + positionAndReward + " ---------------- SIMPLE REIN XP 149"); + //System.out.println("ControlModel " + previousStateCurrentStateAction + " ---------------- SIMPLE REIN XP 149"); + //System.out.println("SpatialReward " + positionAndReward + " ---------------- SIMPLE REIN XP 149"); amoebaSpatialReward.learn(positionAndReward); - amoebaControlModel.learn(previousStateCurrentStateAction); + //amoebaControlModel.learn(previousStateCurrentStateAction); } @@ -352,7 +356,6 @@ public abstract class SimpleReinforcement1DSpatialRewardAndAction { private static AMOEBA setup() { ArrayList<Pair<String, Boolean>> sensors = new ArrayList<>(); sensors.add(new Pair<String, Boolean>("p1", false)); - sensors.add(new Pair<String, Boolean>("p2", false)); File config; try { config = File.createTempFile("config", "xml"); @@ -380,7 +383,6 @@ public abstract class SimpleReinforcement1DSpatialRewardAndAction { private static AMOEBA setupSpatialReward() { ArrayList<Pair<String, Boolean>> sensors = new ArrayList<>(); sensors.add(new Pair<String, Boolean>("p1", false)); - sensors.add(new Pair<String, Boolean>("p2", false)); File config; try { config = File.createTempFile("configSpatialReward", "xml"); @@ -398,13 +400,15 @@ public abstract class SimpleReinforcement1DSpatialRewardAndAction { amoeba.saver = new SaveHelperDummy(); - for(Percept pct : amoeba.getPercepts()) { - pct.setMax(10); - pct.setMin(-10); - } +// for(Percept pct : amoeba.getPercepts()) { +// pct.setMax(10); +// pct.setMin(-10); +// } amoeba.setLocalModel(TypeLocalModel.MILLER_REGRESSION); amoeba.getEnvironment().setMappingErrorAllowed(0.025); + amoeba.setReinforcement(true); + return amoeba; } diff --git a/AMOEBAonAMAK/src/experiments/SimpleReinforcement2DSpatialRewardAndAction.java b/AMOEBAonAMAK/src/experiments/SimpleReinforcement2DSpatialRewardAndAction.java index a43d156f..b526e211 100644 --- a/AMOEBAonAMAK/src/experiments/SimpleReinforcement2DSpatialRewardAndAction.java +++ b/AMOEBAonAMAK/src/experiments/SimpleReinforcement2DSpatialRewardAndAction.java @@ -384,6 +384,8 @@ public abstract class SimpleReinforcement2DSpatialRewardAndAction { pct.setMin(-10); } + amoeba.setReinforcement(true); + return amoeba; } diff --git a/AMOEBAonAMAK/src/gui/ContextRendererFX.java b/AMOEBAonAMAK/src/gui/ContextRendererFX.java index 380baf96..23f41602 100644 --- a/AMOEBAonAMAK/src/gui/ContextRendererFX.java +++ b/AMOEBAonAMAK/src/gui/ContextRendererFX.java @@ -67,7 +67,11 @@ public class ContextRendererFX extends RenderStrategy { g = 0.0; b = 1.0; } - + if(context.lastPrediction > 10000) { + r = 1.0; + g = 1.0; + b = 0.0; + } drawable.setColor(new Color(r, g, b, 200d / 255d)); diff --git a/AMOEBAonAMAK/src/kernel/AMOEBA.java b/AMOEBAonAMAK/src/kernel/AMOEBA.java index 48e1c2d5..3eef319e 100644 --- a/AMOEBAonAMAK/src/kernel/AMOEBA.java +++ b/AMOEBAonAMAK/src/kernel/AMOEBA.java @@ -181,7 +181,7 @@ public class AMOEBA extends Amas<World> implements IAMOEBA { @Override protected void onSystemCycleBegin() { if (cycle % 1000 == 0) { - Log.defaultLog.inform("AMOEBA", "Cycle " + cycle + ". Nb agents: "+getAgents().size()); + //Log.defaultLog.inform("AMOEBA", "Cycle " + cycle + ". Nb agents: "+getAgents().size()); } if(isRenderUpdate()) { @@ -218,6 +218,7 @@ public class AMOEBA extends Amas<World> implements IAMOEBA { toKillContexts.clear(); lastModifiedContext.clear(); alteredContexts.clear(); + data.higherNeighborLastPredictionPercepts=null; } synchronized private void incrementCycleWithoutRender() { @@ -393,7 +394,7 @@ public class AMOEBA extends Amas<World> implements IAMOEBA { } - public double reinfocementRequest(HashMap<String, Double> perceptionsActionState) { + public HashMap<String, Double> reinforcementRequest(HashMap<String, Double> perceptionsActionState) { boolean usingOracle = isUseOracle(); if (usingOracle) head.changeOracleConnection(); @@ -404,7 +405,7 @@ public class AMOEBA extends Amas<World> implements IAMOEBA { if (usingOracle) head.changeOracleConnection(); studiedSystem = ss; - return getAction(); + return getHigherNeighborLastPredictionPercepts(); } @Override @@ -550,8 +551,8 @@ public class AMOEBA extends Amas<World> implements IAMOEBA { - public double getHigherNeighborPrediction() { - return head.getAction(); + public HashMap<String, Double> getHigherNeighborLastPredictionPercepts() { + return head.getHigherNeighborLastPredictionPercepts(); } public ArrayList<Context> getContexts() { diff --git a/AMOEBAonAMAK/src/kernel/AmoebaData.java b/AMOEBAonAMAK/src/kernel/AmoebaData.java index d843af0d..56c05034 100644 --- a/AMOEBAonAMAK/src/kernel/AmoebaData.java +++ b/AMOEBAonAMAK/src/kernel/AmoebaData.java @@ -19,6 +19,7 @@ public class AmoebaData implements Serializable { public int numberOfCriticityValuesForAverageforVizualisation = 300; public Double prediction; + public HashMap<String, Double> higherNeighborLastPredictionPercepts = null; public Double endogenousPredictionActivatedContextsOverlaps = 0.0; public Double endogenousPredictionActivatedContextsOverlapsWorstDimInfluence = 0.0; public Double endogenousPredictionActivatedContextsOverlapsInfluenceWithoutConfidence = 0.0; -- GitLab