ADD: reward propagation with neighbors

8f18ffc5 · BrunoDatoMeneses · 061af960 · 8f18ffc5 · 8f18ffc5 · 8f18ffc5
Commit 8f18ffc5 authored 5 years ago by BrunoDatoMeneses
--- a/AMOEBAonAMAK/src/agents/context/Context.java
+++ b/AMOEBAonAMAK/src/agents/context/Context.java
@@ -774,6 +774,17 @@ public class Context extends AmoebaAgent {

 		return exp;
 	}
+	
+	public Experiment getArtificialExperiment() {
+		ArrayList<Percept> percepts = getAmas().getPercepts();
+		Experiment exp = new Experiment(this);
+		for (Percept pct : percepts) {
+			exp.addDimension(pct, this.getRanges().get(pct).getCenter());
+		}
+		
+
+		return exp;
+	}

 	public double sumOfRangesLengths() {
 		double sum = 0;
@@ -1397,6 +1408,7 @@ public class Context extends AmoebaAgent {

 		s += "Model "+this.localModel.getType()+" :";
 		s += this.localModel.getCoefsFormula() + "\n";
+		s += "Last Predicition " + lastPrediction  +"\n";
 		
 		s += "\n";
 		s += "Ranges :\n";

--- a/AMOEBAonAMAK/src/agents/head/Head.java
+++ b/AMOEBAonAMAK/src/agents/head/Head.java
@@ -13,6 +13,7 @@ import java.util.Queue;
 import agents.AmoebaAgent;
 import agents.context.Context;
 import agents.context.CustomComparator;
+import agents.context.Experiment;
 import agents.percept.Percept;
 import kernel.AMOEBA;
 import ncs.NCS;
@@ -180,6 +181,8 @@ public class Head extends AmoebaAgent {
 		
 		//NCSDetection_PotentialRequest();
 		
+		//predictionPropagation();
+		
 		criticalities.addCriticality("spatialCriticality",
 				(getMinMaxVolume() - getVolumeOfAllContexts()) / getMinMaxVolume());

@@ -692,6 +695,38 @@ public class Head extends AmoebaAgent {
 		
 		
 		
+	}
+	
+	private void predictionPropagation() {
+		
+		
+		getEnvironment().trace(TRACE_LEVEL.DEBUG, new ArrayList<String>(Arrays.asList("------------------------------------------------------------------------------------"
+				+ "---------------------------------------- PREDICTION PROPAGATION")));
+		
+		
+		if(bestContext != null) {
+			for(Context ctxt : activatedNeighborsContexts) {
+				
+				if(ctxt != bestContext) {
+					
+					if(Math.abs(ctxt.lastPrediction - bestContext.lastPrediction)>10) {
+						
+						Experiment propagationExperiment = bestContext.getCurrentExperiment();
+						propagationExperiment.setOracleProposition(bestContext.lastPrediction);
+						
+						ctxt.getLocalModel().updateModel(propagationExperiment, 0.5);
+						ctxt.lastPrediction = ctxt.getActionProposal();
+					}
+					
+				}
+				
+			}
+		}
+		
+		
+		
+		
+		
 	}
 	
 	
@@ -894,7 +929,7 @@ public class Head extends AmoebaAgent {
 		double minDistanceToOraclePrediction = Double.POSITIVE_INFINITY;

 		for (Context activatedContext : activatedContexts) {
-			System.out.println(activatedContext.getName());
+			//System.out.println(activatedContext.getName());
 			currentDistanceToOraclePrediction = activatedContext.getLocalModel()
 					.distance(activatedContext.getCurrentExperiment());
 			getAmas().data.distanceToRegression = currentDistanceToOraclePrediction;

--- a/AMOEBAonAMAK/src/experiments/SimpleReinforcement.java
+++ b/AMOEBAonAMAK/src/experiments/SimpleReinforcement.java
@@ -31,7 +31,7 @@ import utils.XmlConfigGenerator;
 public abstract class SimpleReinforcement {
 	/* Learn and Test */
 	public static final int MAX_STEP_PER_EPISODE = 200;
-	public static final int N_LEARN = 400;
+	public static final int N_LEARN = 100;
 	public static final int N_TEST = 100;
 	
 	/* Exploration */
@@ -50,7 +50,7 @@ public abstract class SimpleReinforcement {
 		learning(new QLearning());
 		System.out.println("----- END QLEARNING -----");*/
 		ArrayList<ArrayList<Double>> results = new ArrayList<>();
-		for(int i = 0; i < 100; i++) {
+		for(int i = 0; i < 1; i++) {
 			//LearningAgent agent = new QLearning();
 			LearningAgent agent = new AmoebaQL();
 			//LearningAgent agent = new AmoebaCoop();
@@ -69,7 +69,7 @@ public abstract class SimpleReinforcement {
 			System.out.println(""+i+"\t"+average);
 		}
 		
-		System.exit(0);
+		//System.exit(0);
 	}
 	
 	/**
@@ -138,10 +138,15 @@ public abstract class SimpleReinforcement {
 			double reward = state2.get("oracle");
 			double q;
 			if(!done) {
+				
+				
 				double expectedReward = amoeba.request(action);
-				double futureAction = this.choose(state2Copy, null).get("a1")/20;
+				HashMap<String, Double> futureState = this.choose(state2Copy, null);
+				futureState.putAll(state2);
+				double futureReward = amoeba.request(futureState);
+				//double futureAction = this.choose(state2Copy, null).get("a1")/20;
 				
-				q = reward + gamma * futureAction - expectedReward;
+				q = reward + gamma * futureReward - expectedReward;
 			} else {
 				q = reward;
 			}
@@ -392,11 +397,12 @@ public abstract class SimpleReinforcement {
 				
 				action = new HashMap<String, Double>();
 				
-				if(rand.nextDouble() < explo) {
-					action = agent.explore(state, env);
-				} else {
-					action = agent.choose(state, env);
-				}
+				action = agent.explore(state, env);
+//				if(rand.nextDouble() < explo) {
+//					action = agent.explore(state, env);
+//				} else {
+//					action = agent.choose(state, env);
+//				}
 				
 				
 				state2 = env.step(action);  // new position with associated reward

--- a/AMOEBAonAMAK/src/experiments/SimpleReinforcement2D.java
+++ b/AMOEBAonAMAK/src/experiments/SimpleReinforcement2D.java
@@ -31,7 +31,7 @@ import utils.XmlConfigGenerator;
 public abstract class SimpleReinforcement2D {
 	/* Learn and Test */
 	public static final int MAX_STEP_PER_EPISODE = 200;
-	public static final int N_LEARN = 400;//400
+	public static final int N_LEARN = 100;//400
 	public static final int N_TEST = 100;
 	
 	/* Exploration */

--- a/AMOEBAonAMAK/src/gui/ContextRendererFX.java
+++ b/AMOEBAonAMAK/src/gui/ContextRendererFX.java
@@ -55,16 +55,15 @@ public class ContextRendererFX extends RenderStrategy {
 		double b = 0.0;
 		
 		if(context.lastPrediction!=null) {
-			r = context.lastPrediction < -900 ? 1.0 : 0.0;
-			g = context.lastPrediction > 900 ? 1.0 : 0.0;
+			r = context.lastPrediction < 0 ? Math.abs(context.lastPrediction)/1000 : 0.0;
+			g = context.lastPrediction > 0 ? context.lastPrediction/1000 : 0.0;
+			r = r > 1.0 ? 1.0 : r;
+			g = g > 1.0 ? 1.0 : g;
 		}else {
 			b = 1.0;
 		}
 		
 		
-		if(Math.abs(context.lastPrediction)>900) {
-			System.out.println("---------------------------------------------" +context.getName() + " " + context.lastPrediction + " r " + r + " g " + g);
-		}
 		
 		drawable.setColor(new Color(r, g, b, 90d / 255d));
 	}

--- a/AMOEBAonAMAK/src/kernel/World.java
+++ b/AMOEBAonAMAK/src/kernel/World.java
@@ -34,7 +34,7 @@ public class World extends Environment {
 	
 	
 	
-	public static TRACE_LEVEL minLevel = TRACE_LEVEL.DEBUG;
+	public static TRACE_LEVEL minLevel = TRACE_LEVEL.ERROR;
 	
 	private AMOEBA amoeba;