Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
AMOEBA3
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SMAC
Learning Group
AMOEBA3
Commits
8125bfb7
Commit
8125bfb7
authored
5 years ago
by
BrunoDatoMeneses
Browse files
Options
Downloads
Patches
Plain Diff
ADD: reinforcement multiUI
parent
3154c15c
Branches
Branches containing commit
No related tags found
1 merge request
!4
Exp rein
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
AMOEBAonAMAK/src/experiments/reinforcement/SimpleReinforcement1DSpatialRewardAndActionMltiUI.java
+837
-0
837 additions, 0 deletions
...nt/SimpleReinforcement1DSpatialRewardAndActionMltiUI.java
with
837 additions
and
0 deletions
AMOEBAonAMAK/src/experiments/reinforcement/SimpleReinforcement1DSpatialRewardAndActionMltiUI.java
0 → 100644
+
837
−
0
View file @
8125bfb7
package
experiments.reinforcement
;
import
java.io.File
;
import
java.io.IOException
;
import
java.io.Serializable
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Random
;
import
agents.context.localModel.TypeLocalModel
;
import
agents.percept.Percept
;
import
experiments.nDimensionsLaunchers.F_N_Manager
;
import
fr.irit.smac.amak.Configuration
;
import
fr.irit.smac.amak.tools.Log
;
import
fr.irit.smac.amak.ui.VUIMulti
;
import
fr.irit.smac.amak.ui.drawables.Drawable
;
import
gui.AmoebaMultiUIWindow
;
import
gui.AmoebaWindow
;
import
javafx.application.Application
;
import
javafx.application.Platform
;
import
javafx.beans.value.ChangeListener
;
import
javafx.beans.value.ObservableValue
;
import
javafx.scene.control.Slider
;
import
javafx.stage.Stage
;
import
kernel.AMOEBA
;
import
kernel.StudiedSystem
;
import
kernel.World
;
import
kernel.backup.BackupSystem
;
import
kernel.backup.IBackupSystem
;
import
kernel.backup.SaveHelperDummy
;
import
kernel.backup.SaveHelperImpl
;
import
utils.Pair
;
import
utils.RandomUtils
;
import
utils.TRACE_LEVEL
;
import
utils.XmlConfigGenerator
;
/**
* Train an amoeba on a simple reinforcement task.
* The goal of the task is to get to the center. When the position of the agent cross 0, it gets a reward of 100.
* The agent can only moves in 2 directions, of a distance of 1. Moving give a reward of -1.
* If the agent moves outside of the allowed range, it gets a reward of -100.
* @author Hugo
*
*/
public
abstract
class
SimpleReinforcement1DSpatialRewardAndActionMltiUI
extends
Application
implements
Serializable
{
AMOEBA
amoebaSpatialReward
;
VUIMulti
amoebaSpatialRewardVUI
;
AmoebaMultiUIWindow
amoebaSpatialRewardUI
;
AMOEBA
amoebaControlModel
;
VUIMulti
amoebaControlModelVUI
;
AmoebaMultiUIWindow
amoebaControlModelUI
;
/* Learn and Test */
public
static
final
int
MAX_STEP_PER_EPISODE
=
200
;
public
static
final
int
N_LEARN
=
1000
;
//400
public
static
final
int
N_TEST
=
100
;
/* Exploration */
public
static
final
double
MIN_EXPLO_RATE
=
0.02
;
public
static
final
double
EXPLO_RATE_DIMINUTION_FACTOR
=
0.01
;
public
static
final
double
EXPLO_RATE_BASE
=
1
;
public
static
void
main
(
String
[]
args
)
throws
IOException
{
Application
.
launch
(
args
);
}
@Override
public
void
start
(
Stage
arg0
)
throws
Exception
,
IOException
{
Configuration
.
multiUI
=
true
;
Configuration
.
commandLineMode
=
false
;
Configuration
.
allowedSimultaneousAgentsExecution
=
1
;
Configuration
.
waitForGUI
=
true
;
Configuration
.
plotMilliSecondsUpdate
=
20000
;
amoebaSpatialRewardVUI
=
new
VUIMulti
(
"2D"
);
amoebaSpatialRewardUI
=
new
AmoebaMultiUIWindow
(
"SPATIAL_REWARD"
,
amoebaSpatialRewardVUI
);
amoebaControlModelVUI
=
new
VUIMulti
(
"2D"
);
amoebaControlModelUI
=
new
AmoebaMultiUIWindow
(
"CONTROL_MODEL"
,
amoebaControlModelVUI
);
//startTask(100, 1000);
}
public
void
startTask
(
long
wait
,
int
cycles
)
{
// Create a Runnable
Runnable
task
=
new
Runnable
()
{
public
void
run
()
{
runTask
(
wait
,
cycles
);
}
};
// Run the task in a background thread
Thread
backgroundThread
=
new
Thread
(
task
);
// Terminate the running thread if the application exits
backgroundThread
.
setDaemon
(
true
);
// Start the thread
backgroundThread
.
start
();
}
public
void
startTask2
(
long
wait
,
int
cycles
)
{
// Create a Runnable
Runnable
task
=
new
Runnable
()
{
public
void
run
()
{
runTask2
(
wait
,
cycles
);
}
};
// Run the task in a background thread
Thread
backgroundThread
=
new
Thread
(
task
);
// Terminate the running thread if the application exits
backgroundThread
.
setDaemon
(
true
);
// Start the thread
backgroundThread
.
start
();
}
public
void
runTask
(
long
wait
,
int
cycles
)
{
try
{
// Update the Label on the JavaFx Application Thread
Platform
.
runLater
(
new
Runnable
()
{
@Override
public
void
run
()
{
ArrayList
<
ArrayList
<
Double
>>
results
=
new
ArrayList
<>();
//LearningAgent agent = new QLearning();
LearningAgent
agent
=
new
AmoebaQL
();
//LearningAgent agent = new AmoebaCoop();
Environment
env
=
new
OneDimensionEnv
(
10
);
results
.
add
(
learning
(
agent
,
env
));
int
nbEpisodes
=
results
.
get
(
0
).
size
();
for
(
int
i
=
0
;
i
<
nbEpisodes
;
i
++)
{
double
average
=
0
;
for
(
int
j
=
0
;
j
<
results
.
size
();
j
++)
{
average
+=
results
.
get
(
j
).
get
(
i
);
}
average
/=
results
.
size
();
System
.
out
.
println
(
""
+
i
+
"\t"
+
average
);
}
}
});
Thread
.
sleep
(
wait
);
}
catch
(
InterruptedException
e
)
{
e
.
printStackTrace
();
}
// for(int i = 0; i < cycles; i++)
// {
// try
// {
// // Get the Status
// final String status = "Processing " + i + " of " + cycles;
//
// // Update the Label on the JavaFx Application Thread
// Platform.runLater(new Runnable()
// {
// @Override
// public void run()
// {
// ///
// }
// });
//
// Thread.sleep(wait);
// }
// catch (InterruptedException e)
// {
// e.printStackTrace();
// }
// }
}
public
void
runTask2
(
long
wait
,
int
cycles
)
{
try
{
// Update the Label on the JavaFx Application Thread
Platform
.
runLater
(
new
Runnable
()
{
@Override
public
void
run
()
{
///
}
});
Thread
.
sleep
(
wait
);
}
catch
(
InterruptedException
e
)
{
e
.
printStackTrace
();
}
for
(
int
i
=
0
;
i
<
cycles
;
i
++)
{
try
{
// Get the Status
final
String
status
=
"Processing "
+
i
+
" of "
+
cycles
;
// Update the Label on the JavaFx Application Thread
Platform
.
runLater
(
new
Runnable
()
{
@Override
public
void
run
()
{
///
}
});
Thread
.
sleep
(
wait
);
}
catch
(
InterruptedException
e
)
{
e
.
printStackTrace
();
}
}
}
@Override
public
void
stop
()
throws
Exception
{
super
.
stop
();
System
.
exit
(
0
);
}
/**
* An environment in which a LearningAgent reside
* @author Hugo
*
*/
public
interface
Environment
{
public
List
<
String
>
actionSpace
();
public
List
<
String
>
perceptionSpace
();
public
HashMap
<
String
,
Double
>
reset
();
public
HashMap
<
String
,
Double
>
step
(
HashMap
<
String
,
Double
>
action
);
public
HashMap
<
String
,
Double
>
randomAction
();
}
/**
* Wrapper for any kind of learning agent
* @author Hugo
*
*/
public
interface
LearningAgent
{
public
HashMap
<
String
,
Double
>
choose
(
HashMap
<
String
,
Double
>
state
,
Environment
env
);
public
HashMap
<
String
,
Double
>
explore
(
HashMap
<
String
,
Double
>
state
,
Environment
env
);
public
void
learn
(
HashMap
<
String
,
Double
>
state
,
HashMap
<
String
,
Double
>
state2
,
HashMap
<
String
,
Double
>
action
,
boolean
done
);
}
/**
* Compatible only with OneDimensionEnv
* @author Hugo
*
*/
public
static
class
AmoebaQL
implements
LearningAgent
{
public
AMOEBA
amoebaSpatialReward
;
//public AMOEBA amoebaControlModel;
public
double
lr
=
0.8
;
public
double
gamma
=
0.9
;
private
Random
rand
=
new
Random
();
public
AmoebaQL
()
{
amoebaSpatialReward
=
setupSpatialReward
();
//amoebaControlModel = setupControlModel();
}
@Override
public
HashMap
<
String
,
Double
>
choose
(
HashMap
<
String
,
Double
>
state
,
Environment
env
)
{
// HashMap<String, Double> stateWithVizuAdded = new HashMap<String, Double>(state);
// stateWithVizuAdded.put("p2", 0.0);
// stateWithVizuAdded.put("oracle", 0.0);
// HashMap<String, Double> bestFuturePosition = amoebaSpatialReward.reinforcementRequest(stateWithVizuAdded);
//
// HashMap<String, Double> action = new HashMap<String, Double>();
// if(bestFuturePosition!=null) {
// HashMap<String, Double> requestForControlModel = new HashMap<String, Double>();
// requestForControlModel.put("pCurrent", state.get("p1"));
// requestForControlModel.put("pGoal", bestFuturePosition.get("p1"));
//
// double bestAction = amoebaControlModel.request(requestForControlModel);
//
//
// action.put("a1", bestAction);
// }
// action = env.randomAction();
//
// return action;
return
null
;
}
@Override
public
void
learn
(
HashMap
<
String
,
Double
>
state
,
HashMap
<
String
,
Double
>
positionAndReward
,
HashMap
<
String
,
Double
>
action
,
boolean
done
)
{
// state : previous position and associated reward
// state2 : new position with current reward
// action : previous state, current actions and current reward
HashMap
<
String
,
Double
>
previousStateCurrentStateAction
=
new
HashMap
<>();
previousStateCurrentStateAction
.
put
(
"pCurrent"
,
action
.
get
(
"p1"
));
previousStateCurrentStateAction
.
put
(
"pGoal"
,
positionAndReward
.
get
(
"p1"
));
previousStateCurrentStateAction
.
put
(
"oracle"
,
action
.
get
(
"a1"
));
//System.out.println("ControlModel " + previousStateCurrentStateAction + " ---------------- SIMPLE REIN XP 149");
//System.out.println("SpatialReward " + positionAndReward + " ---------------- SIMPLE REIN XP 149");
amoebaSpatialReward
.
learn
(
positionAndReward
);
//amoebaControlModel.learn(previousStateCurrentStateAction);
}
@Override
public
HashMap
<
String
,
Double
>
explore
(
HashMap
<
String
,
Double
>
state
,
Environment
env
)
{
return
env
.
randomAction
();
}
}
/**
* Compatible only with OneDimensionEnv.<br/>
* An extremely crude and quick implementation of Q learning.
* Not expected to perform well, but should be better than random.
* @author Hugo
*
*/
public
static
class
QLearning
implements
LearningAgent
{
public
double
[][]
Q
=
new
double
[
102
][
2
];
public
double
lr
=
0.8
;
public
double
gamma
=
0.9
;
private
Random
rand
=
new
Random
();
@Override
public
HashMap
<
String
,
Double
>
choose
(
HashMap
<
String
,
Double
>
state
,
Environment
env
)
{
int
p
=
state
.
get
(
"p1"
).
intValue
()+
50
;
double
a
;
if
(
Q
[
p
][
0
]
==
Q
[
p
][
1
])
{
a
=
rand
.
nextBoolean
()
?
-
1
:
1
;
}
else
{
a
=
Q
[
p
][
0
]
>
Q
[
p
][
1
]
?
-
1
:
1
;
}
HashMap
<
String
,
Double
>
action
=
new
HashMap
<
String
,
Double
>();
action
.
put
(
"a1"
,
a
);
return
action
;
}
@Override
public
void
learn
(
HashMap
<
String
,
Double
>
state
,
HashMap
<
String
,
Double
>
state2
,
HashMap
<
String
,
Double
>
action
,
boolean
done
)
{
int
p
=
state
.
get
(
"p1"
).
intValue
()+
50
;
int
p2
=
state2
.
get
(
"p1"
).
intValue
()+
50
;
int
a
=
action
.
get
(
"a1"
).
intValue
()
==
-
1
?
0
:
1
;
double
reward
=
state2
.
get
(
"oracle"
);
double
max
=
Double
.
NEGATIVE_INFINITY
;
if
(!
done
)
{
for
(
Double
v
:
Q
[
p2
])
{
max
=
Math
.
max
(
max
,
v
);
}
}
else
{
max
=
reward
;
}
//
double
q
=
reward
+
gamma
*
max
-
Q
[
p
][
a
];
Q
[
p
][
a
]
+=
lr
*
q
;
}
@Override
public
HashMap
<
String
,
Double
>
explore
(
HashMap
<
String
,
Double
>
state
,
Environment
env
)
{
return
env
.
randomAction
();
}
}
public
static
class
OneDimensionEnv
implements
Environment
{
private
Random
rand
=
new
Random
();
private
double
x
=
0
;
private
double
reward
=
0
;
private
double
size
;
private
Drawable
pos
;
public
OneDimensionEnv
(
double
envSize
)
{
size
=
envSize
;
if
(!
Configuration
.
commandLineMode
)
{
AmoebaWindow
instance
=
AmoebaWindow
.
instance
();
//pos = new DrawableOval(0.5, 0.5, 1, 1);
//pos.setColor(new Color(0.5, 0.0, 0.0, 0.5));
//instance.mainVUI.add(pos);
//instance.mainVUI.createAndAddRectangle(-50, -0.25, 100, 0.5);
//instance.mainVUI.createAndAddRectangle(-0.25, -1, 0.5, 2);
instance
.
point
.
hide
();
//instance.rectangle.hide();
}
}
@Override
public
HashMap
<
String
,
Double
>
reset
(){
x
=
RandomUtils
.
nextDouble
(
rand
,
-
size
,
Math
.
nextUp
(
size
));
x
=
Math
.
round
(
x
);
reward
=
0.0
;
//pos.move(x+0.5, 0.5);
HashMap
<
String
,
Double
>
ret
=
new
HashMap
<>();
ret
.
put
(
"p1"
,
x
);
ret
.
put
(
"oracle"
,
reward
);
return
ret
;
}
@Override
public
HashMap
<
String
,
Double
>
step
(
HashMap
<
String
,
Double
>
actionMap
){
double
action
=
actionMap
.
get
(
"a1"
);
//if(action == 0.0) action = rand.nextDouble();
if
(
action
>
0.0
)
action
=
Math
.
ceil
(
action
);
if
(
action
<
0.0
)
action
=
Math
.
floor
(
action
);
if
(
action
>
1.0
)
action
=
1.0
;
if
(
action
<
-
1.0
)
action
=
-
1.0
;
double
oldX
=
x
;
x
=
x
+
action
;
//System.out.println("ACTIONS " + " a1 " +action + " " + " a2 " + action2);
if
(
x
<
-
size
||
x
>
size
)
{
reward
=
-
1000.0
;
}
else
if
((
x
==
0.0
)
||
(
sign
(
oldX
)
!=
sign
(
x
)
))
{
// win !
reward
=
1000.0
;
}
else
{
reward
=
-
1.0
;
}
HashMap
<
String
,
Double
>
ret
=
new
HashMap
<>();
ret
.
put
(
"p1"
,
x
);
ret
.
put
(
"oracle"
,
reward
);
//pos.move(x+0.5, 0.5);
return
ret
;
}
@Override
public
List
<
String
>
actionSpace
()
{
ArrayList
<
String
>
l
=
new
ArrayList
<>();
l
.
add
(
"a1 enum:true {-1, 0, 1}"
);
return
l
;
}
@Override
public
List
<
String
>
perceptionSpace
()
{
ArrayList
<
String
>
l
=
new
ArrayList
<>();
l
.
add
(
"p1 enum:false [-"
+
size
+
", "
+
size
+
"]"
);
return
l
;
}
@Override
public
HashMap
<
String
,
Double
>
randomAction
()
{
double
a1
=
rand
.
nextBoolean
()
?
-
1
:
1
;
HashMap
<
String
,
Double
>
action
=
new
HashMap
<
String
,
Double
>();
action
.
put
(
"a1"
,
a1
);
return
action
;
}
}
/**
* Setup an amoeba for the SimpleReinforcement problem
* @return
*/
private
static
AMOEBA
setup
()
{
ArrayList
<
Pair
<
String
,
Boolean
>>
sensors
=
new
ArrayList
<>();
sensors
.
add
(
new
Pair
<
String
,
Boolean
>(
"p1"
,
false
));
File
config
;
try
{
config
=
File
.
createTempFile
(
"config"
,
"xml"
);
XmlConfigGenerator
.
makeXML
(
config
,
sensors
);
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
System
.
exit
(
1
);
return
null
;
// now compilator know config is initialized
}
//File config = new File("resources/simpleReinManualTrained.xml");
Log
.
defaultMinLevel
=
Log
.
Level
.
INFORM
;
World
.
minLevel
=
TRACE_LEVEL
.
ERROR
;
AMOEBA
amoeba
=
new
AMOEBA
(
null
,
null
,
config
.
getAbsolutePath
(),
null
);
amoeba
.
saver
=
new
SaveHelperDummy
();
return
amoeba
;
}
private
static
AMOEBA
setupSpatialReward
()
{
ArrayList
<
Pair
<
String
,
Boolean
>>
sensors
=
new
ArrayList
<>();
sensors
.
add
(
new
Pair
<
String
,
Boolean
>(
"p1"
,
false
));
File
config
;
try
{
config
=
File
.
createTempFile
(
"configSpatialReward"
,
"xml"
);
XmlConfigGenerator
.
makeXML
(
config
,
sensors
);
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
System
.
exit
(
1
);
return
null
;
// now compilator know config is initialized
}
//File config = new File("resources/simpleReinManualTrained.xml");
Log
.
defaultMinLevel
=
Log
.
Level
.
INFORM
;
World
.
minLevel
=
TRACE_LEVEL
.
ERROR
;
AMOEBA
amoeba
=
new
AMOEBA
(
null
,
null
,
config
.
getAbsolutePath
(),
null
);
amoeba
.
saver
=
new
SaveHelperDummy
();
amoeba
.
setLocalModel
(
TypeLocalModel
.
MILLER_REGRESSION
);
amoeba
.
getEnvironment
().
setMappingErrorAllowed
(
0.025
);
//amoeba.setReinforcement(true);
return
amoeba
;
}
private
static
AMOEBA
setupControlModel
()
{
ArrayList
<
Pair
<
String
,
Boolean
>>
sensors
=
new
ArrayList
<>();
sensors
.
add
(
new
Pair
<
String
,
Boolean
>(
"pCurrent"
,
false
));
sensors
.
add
(
new
Pair
<
String
,
Boolean
>(
"pGoal"
,
false
));
File
config
;
try
{
config
=
File
.
createTempFile
(
"configControlModel"
,
"xml"
);
XmlConfigGenerator
.
makeXML
(
config
,
sensors
);
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
System
.
exit
(
1
);
return
null
;
// now compilator know config is initialized
}
//File config = new File("resources/simpleReinManualTrained.xml");
Log
.
defaultMinLevel
=
Log
.
Level
.
INFORM
;
World
.
minLevel
=
TRACE_LEVEL
.
ERROR
;
AMOEBA
amoeba
=
new
AMOEBA
(
null
,
null
,
config
.
getAbsolutePath
(),
null
);
amoeba
.
saver
=
new
SaveHelperDummy
();
amoeba
.
setLocalModel
(
TypeLocalModel
.
MILLER_REGRESSION
);
amoeba
.
getEnvironment
().
setMappingErrorAllowed
(
0.025
);
return
amoeba
;
}
/**
* Teach a learning agent on the SimpleReinforcement problem
* @param agent
* @return
*/
public
static
ArrayList
<
Double
>
learning
(
LearningAgent
agent
,
Environment
env
){
ArrayList
<
Double
>
averageRewards
=
new
ArrayList
<
Double
>();
Random
rand
=
new
Random
();
Random
r
=
new
Random
();
HashMap
<
String
,
Double
>
state
=
env
.
reset
();
HashMap
<
String
,
Double
>
state2
;
double
explo
=
EXPLO_RATE_BASE
;
for
(
int
i
=
0
;
i
<
N_LEARN
;
i
++)
{
int
nbStep
=
0
;
state
=
env
.
reset
();
HashMap
<
String
,
Double
>
action
=
new
HashMap
<
String
,
Double
>();
double
totReward
=
0.0
;
// execute simulation cycles
boolean
done
=
false
;
boolean
invalid
=
false
;
while
(!
done
&&
!
invalid
)
{
nbStep
++;
if
(
nbStep
>
MAX_STEP_PER_EPISODE
)
{
invalid
=
true
;
}
state
.
remove
(
"oracle"
);
action
=
new
HashMap
<
String
,
Double
>();
action
=
agent
.
explore
(
state
,
env
);
// if(rand.nextDouble() < explo) {
// action = agent.explore(state, env);
// } else {
// action = agent.choose(state, env);
// }
state2
=
env
.
step
(
action
);
// new position with associated reward
if
(
state2
.
get
(
"oracle"
)
!=
-
1.0
)
{
//if goal or end of world
done
=
true
;
}
action
.
put
(
"p1"
,
state
.
get
(
"p1"
));
//add previous state to action
action
.
put
(
"oracle"
,
state2
.
get
(
"oracle"
));
//add current reward to action
// state : previous position and associated reward
// state2 : new position with current reward
// action : previous state, current action and current reward
agent
.
learn
(
state
,
state2
,
action
,
done
);
totReward
+=
action
.
get
(
"oracle"
);
state
=
state2
;
}
System
.
out
.
println
(
"-----------------------------------------------------------------------"
);
// update exploration rate
if
(
explo
>
MIN_EXPLO_RATE
)
{
explo
-=
EXPLO_RATE_DIMINUTION_FACTOR
;
if
(
explo
<
MIN_EXPLO_RATE
)
explo
=
MIN_EXPLO_RATE
;
}
System
.
out
.
println
(
"Episode "
+
i
+
" reward : "
+
totReward
+
" explo : "
+
explo
);
//double testAR = test(agent, env, r, N_TEST);
//averageRewards.add(testAR);
//Scanner scan = new Scanner(System.in);
//scan.nextLine();
}
return
averageRewards
;
}
private
static
double
test
(
LearningAgent
agent
,
Environment
env
,
Random
r
,
int
nbTest
)
{
HashMap
<
String
,
Double
>
state
;
HashMap
<
String
,
Double
>
state2
;
double
nbPositiveReward
=
0.0
;
double
tot_reward
=
0.0
;
for
(
int
i
=
0
;
i
<
nbTest
;
i
++)
{
double
reward
=
0.0
;
state
=
env
.
reset
();
// execute simulation cycles
boolean
done
=
false
;
int
nbStep
=
0
;
while
(!
done
)
{
nbStep
++;
if
(
nbStep
>
200
)
{
done
=
true
;
}
state
.
remove
(
"oracle"
);
HashMap
<
String
,
Double
>
a
=
agent
.
choose
(
state
,
env
);
state2
=
env
.
step
(
a
);
if
(
state2
.
get
(
"oracle"
)
!=
-
1.0
)
{
done
=
true
;
}
reward
+=
state2
.
get
(
"oracle"
);
state
=
state2
;
}
if
(
reward
>
0
)
{
nbPositiveReward
+=
1.0
;
}
tot_reward
+=
reward
;
}
double
averageReward
=
tot_reward
/
nbTest
;
System
.
out
.
println
(
"Test average reward : "
+
averageReward
+
" Positive reward %: "
+(
nbPositiveReward
/
nbTest
));
return
averageReward
;
}
/**
* This is a proof of concept, showing that if amoeba learn the correct model of the reward,
* it can produce a good solution.
* The expected average reward for the optimal solution is 75.
* The main cause of negative reward is infinite loop (usually near the objective). In such case, the reward is -200
*/
public
static
void
poc
(
boolean
learnMalus
)
{
AMOEBA
amoeba
=
setup
();
Environment
env
=
new
OneDimensionEnv
(
50
);
// train
for
(
double
n
=
0.0
;
n
<
0.5
;
n
+=
0.1
)
{
double
pos
=
50.0
-
n
;
for
(
int
i
=
0
;
i
<
49
;
i
++)
{
double
reward
=
100
-
Math
.
abs
(
pos
);
HashMap
<
String
,
Double
>
action
=
new
HashMap
<
String
,
Double
>();
action
.
put
(
"p1"
,
pos
);
action
.
put
(
"a1"
,
-
1.0
);
action
.
put
(
"oracle"
,
reward
);
amoeba
.
learn
(
action
);
if
(
learnMalus
)
{
reward
=
-
150
+
Math
.
abs
(
pos
);
action
.
put
(
"a1"
,
1.0
);
action
.
put
(
"oracle"
,
reward
);
amoeba
.
learn
(
action
);
}
pos
-=
1.0
;
}
pos
=
-
50.0
-
n
;
for
(
int
i
=
0
;
i
<
49
;
i
++)
{
double
reward
=
100
-
Math
.
abs
(
pos
);
HashMap
<
String
,
Double
>
action
=
new
HashMap
<
String
,
Double
>();
action
.
put
(
"p1"
,
pos
);
action
.
put
(
"a1"
,
1.0
);
action
.
put
(
"oracle"
,
reward
);
amoeba
.
learn
(
action
);
if
(
learnMalus
)
{
reward
=
-
150
+
Math
.
abs
(
pos
);
action
.
put
(
"a1"
,
-
1.0
);
action
.
put
(
"oracle"
,
reward
);
amoeba
.
learn
(
action
);
}
pos
+=
1.0
;
}
}
// tests
Random
r
=
new
Random
();
HashMap
<
String
,
Double
>
state
=
env
.
reset
();
HashMap
<
String
,
Double
>
state2
;
double
tot_reward
=
0.0
;
int
nbTest
=
100
;
double
nbPositiveReward
=
0
;
for
(
int
i
=
0
;
i
<
nbTest
;
i
++)
{
double
reward
=
0.0
;
state
=
env
.
reset
();
HashMap
<
String
,
Double
>
action
=
new
HashMap
<
String
,
Double
>();
// execute simulation cycles
boolean
done
=
false
;
int
nbStep
=
0
;
while
(!
done
)
{
nbStep
++;
if
(
nbStep
>
200
)
{
done
=
true
;
}
state
.
remove
(
"oracle"
);
action
=
amoeba
.
maximize
(
state
);
// random action if no proposition from amoeba
if
(
action
.
get
(
"oracle"
).
equals
(
Double
.
NEGATIVE_INFINITY
)
)
{
action
.
put
(
"a1"
,
(
r
.
nextBoolean
()
?
1.0
:
-
1.0
));
}
//System.out.println("action "+action);
state2
=
env
.
step
(
action
);
if
(
state2
.
get
(
"oracle"
)
!=
-
1.0
)
{
done
=
true
;
}
reward
+=
state2
.
get
(
"oracle"
);
//System.out.println("state2 "+state2+" reward "+reward);
state
=
state2
;
}
if
(
reward
>
0
)
{
nbPositiveReward
+=
1.0
;
}
tot_reward
+=
reward
;
//System.out.println("-----------------------------\nTot reward "+tot_reward+"\n-----------------------------");
}
System
.
out
.
println
(
"Average reward : "
+
tot_reward
/
nbTest
+
" Positive reward %: "
+(
nbPositiveReward
/
nbTest
));
}
private
static
int
sign
(
double
x
)
{
return
x
<
0
?
-
1
:
1
;
}
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment