Skip to content
Snippets Groups Projects
Commit 8f18ffc5 authored by BrunoDatoMeneses's avatar BrunoDatoMeneses
Browse files

ADD: reward propagation with neighbors

parent 061af960
Branches
No related tags found
1 merge request!4Exp rein
......@@ -774,6 +774,17 @@ public class Context extends AmoebaAgent {
return exp;
}
public Experiment getArtificialExperiment() {
ArrayList<Percept> percepts = getAmas().getPercepts();
Experiment exp = new Experiment(this);
for (Percept pct : percepts) {
exp.addDimension(pct, this.getRanges().get(pct).getCenter());
}
return exp;
}
public double sumOfRangesLengths() {
double sum = 0;
......@@ -1397,6 +1408,7 @@ public class Context extends AmoebaAgent {
s += "Model "+this.localModel.getType()+" :";
s += this.localModel.getCoefsFormula() + "\n";
s += "Last Predicition " + lastPrediction +"\n";
s += "\n";
s += "Ranges :\n";
......
......@@ -13,6 +13,7 @@ import java.util.Queue;
import agents.AmoebaAgent;
import agents.context.Context;
import agents.context.CustomComparator;
import agents.context.Experiment;
import agents.percept.Percept;
import kernel.AMOEBA;
import ncs.NCS;
......@@ -180,6 +181,8 @@ public class Head extends AmoebaAgent {
//NCSDetection_PotentialRequest();
//predictionPropagation();
criticalities.addCriticality("spatialCriticality",
(getMinMaxVolume() - getVolumeOfAllContexts()) / getMinMaxVolume());
......@@ -692,6 +695,38 @@ public class Head extends AmoebaAgent {
}
private void predictionPropagation() {
getEnvironment().trace(TRACE_LEVEL.DEBUG, new ArrayList<String>(Arrays.asList("------------------------------------------------------------------------------------"
+ "---------------------------------------- PREDICTION PROPAGATION")));
if(bestContext != null) {
for(Context ctxt : activatedNeighborsContexts) {
if(ctxt != bestContext) {
if(Math.abs(ctxt.lastPrediction - bestContext.lastPrediction)>10) {
Experiment propagationExperiment = bestContext.getCurrentExperiment();
propagationExperiment.setOracleProposition(bestContext.lastPrediction);
ctxt.getLocalModel().updateModel(propagationExperiment, 0.5);
ctxt.lastPrediction = ctxt.getActionProposal();
}
}
}
}
}
......@@ -894,7 +929,7 @@ public class Head extends AmoebaAgent {
double minDistanceToOraclePrediction = Double.POSITIVE_INFINITY;
for (Context activatedContext : activatedContexts) {
System.out.println(activatedContext.getName());
//System.out.println(activatedContext.getName());
currentDistanceToOraclePrediction = activatedContext.getLocalModel()
.distance(activatedContext.getCurrentExperiment());
getAmas().data.distanceToRegression = currentDistanceToOraclePrediction;
......
......@@ -31,7 +31,7 @@ import utils.XmlConfigGenerator;
public abstract class SimpleReinforcement {
/* Learn and Test */
public static final int MAX_STEP_PER_EPISODE = 200;
public static final int N_LEARN = 400;
public static final int N_LEARN = 100;
public static final int N_TEST = 100;
/* Exploration */
......@@ -50,7 +50,7 @@ public abstract class SimpleReinforcement {
learning(new QLearning());
System.out.println("----- END QLEARNING -----");*/
ArrayList<ArrayList<Double>> results = new ArrayList<>();
for(int i = 0; i < 100; i++) {
for(int i = 0; i < 1; i++) {
//LearningAgent agent = new QLearning();
LearningAgent agent = new AmoebaQL();
//LearningAgent agent = new AmoebaCoop();
......@@ -69,7 +69,7 @@ public abstract class SimpleReinforcement {
System.out.println(""+i+"\t"+average);
}
System.exit(0);
//System.exit(0);
}
/**
......@@ -138,10 +138,15 @@ public abstract class SimpleReinforcement {
double reward = state2.get("oracle");
double q;
if(!done) {
double expectedReward = amoeba.request(action);
double futureAction = this.choose(state2Copy, null).get("a1")/20;
HashMap<String, Double> futureState = this.choose(state2Copy, null);
futureState.putAll(state2);
double futureReward = amoeba.request(futureState);
//double futureAction = this.choose(state2Copy, null).get("a1")/20;
q = reward + gamma * futureAction - expectedReward;
q = reward + gamma * futureReward - expectedReward;
} else {
q = reward;
}
......@@ -392,11 +397,12 @@ public abstract class SimpleReinforcement {
action = new HashMap<String, Double>();
if(rand.nextDouble() < explo) {
action = agent.explore(state, env);
} else {
action = agent.choose(state, env);
}
action = agent.explore(state, env);
// if(rand.nextDouble() < explo) {
// action = agent.explore(state, env);
// } else {
// action = agent.choose(state, env);
// }
state2 = env.step(action); // new position with associated reward
......
......@@ -31,7 +31,7 @@ import utils.XmlConfigGenerator;
public abstract class SimpleReinforcement2D {
/* Learn and Test */
public static final int MAX_STEP_PER_EPISODE = 200;
public static final int N_LEARN = 400;//400
public static final int N_LEARN = 100;//400
public static final int N_TEST = 100;
/* Exploration */
......
......@@ -55,16 +55,15 @@ public class ContextRendererFX extends RenderStrategy {
double b = 0.0;
if(context.lastPrediction!=null) {
r = context.lastPrediction < -900 ? 1.0 : 0.0;
g = context.lastPrediction > 900 ? 1.0 : 0.0;
r = context.lastPrediction < 0 ? Math.abs(context.lastPrediction)/1000 : 0.0;
g = context.lastPrediction > 0 ? context.lastPrediction/1000 : 0.0;
r = r > 1.0 ? 1.0 : r;
g = g > 1.0 ? 1.0 : g;
}else {
b = 1.0;
}
if(Math.abs(context.lastPrediction)>900) {
System.out.println("---------------------------------------------" +context.getName() + " " + context.lastPrediction + " r " + r + " g " + g);
}
drawable.setColor(new Color(r, g, b, 90d / 255d));
}
......
......@@ -34,7 +34,7 @@ public class World extends Environment {
public static TRACE_LEVEL minLevel = TRACE_LEVEL.DEBUG;
public static TRACE_LEVEL minLevel = TRACE_LEVEL.ERROR;
private AMOEBA amoeba;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment